whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

filter.c (225707B)


      1 /*
      2  * Linux Socket Filter - Kernel level socket filtering
      3  *
      4  * Based on the design of the Berkeley Packet Filter. The new
      5  * internal format has been designed by PLUMgrid:
      6  *
      7  *	Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com
      8  *
      9  * Authors:
     10  *
     11  *	Jay Schulist <jschlst@samba.org>
     12  *	Alexei Starovoitov <ast@plumgrid.com>
     13  *	Daniel Borkmann <dborkman@redhat.com>
     14  *
     15  * This program is free software; you can redistribute it and/or
     16  * modify it under the terms of the GNU General Public License
     17  * as published by the Free Software Foundation; either version
     18  * 2 of the License, or (at your option) any later version.
     19  *
     20  * Andi Kleen - Fix a few bad bugs and races.
     21  * Kris Katterjohn - Added many additional checks in bpf_check_classic()
     22  */
     23 
     24 #include <linux/module.h>
     25 #include <linux/types.h>
     26 #include <linux/mm.h>
     27 #include <linux/fcntl.h>
     28 #include <linux/socket.h>
     29 #include <linux/sock_diag.h>
     30 #include <linux/in.h>
     31 #include <linux/inet.h>
     32 #include <linux/netdevice.h>
     33 #include <linux/if_packet.h>
     34 #include <linux/if_arp.h>
     35 #include <linux/gfp.h>
     36 #include <net/inet_common.h>
     37 #include <net/ip.h>
     38 #include <net/protocol.h>
     39 #include <net/netlink.h>
     40 #include <linux/skbuff.h>
     41 #include <linux/skmsg.h>
     42 #include <net/sock.h>
     43 #include <net/flow_dissector.h>
     44 #include <linux/errno.h>
     45 #include <linux/timer.h>
     46 #include <linux/uaccess.h>
     47 #include <asm/unaligned.h>
     48 #include <asm/cmpxchg.h>
     49 #include <linux/filter.h>
     50 #include <linux/ratelimit.h>
     51 #include <linux/seccomp.h>
     52 #include <linux/if_vlan.h>
     53 #include <linux/bpf.h>
     54 #include <net/sch_generic.h>
     55 #include <net/cls_cgroup.h>
     56 #include <net/dst_metadata.h>
     57 #include <net/dst.h>
     58 #include <net/sock_reuseport.h>
     59 #include <net/busy_poll.h>
     60 #include <net/tcp.h>
     61 #include <net/xfrm.h>
     62 #include <net/udp.h>
     63 #include <linux/bpf_trace.h>
     64 #include <net/xdp_sock.h>
     65 #include <linux/inetdevice.h>
     66 #include <net/inet_hashtables.h>
     67 #include <net/inet6_hashtables.h>
     68 #include <net/ip_fib.h>
     69 #include <net/flow.h>
     70 #include <net/arp.h>
     71 #include <net/ipv6.h>
     72 #include <net/net_namespace.h>
     73 #include <linux/seg6_local.h>
     74 #include <net/seg6.h>
     75 #include <net/seg6_local.h>
     76 #include <net/lwtunnel.h>
     77 
     78 /**
     79  *	sk_filter_trim_cap - run a packet through a socket filter
     80  *	@sk: sock associated with &sk_buff
     81  *	@skb: buffer to filter
     82  *	@cap: limit on how short the eBPF program may trim the packet
     83  *
     84  * Run the eBPF program and then cut skb->data to correct size returned by
     85  * the program. If pkt_len is 0 we toss packet. If skb->len is smaller
     86  * than pkt_len we keep whole skb->data. This is the socket level
     87  * wrapper to BPF_PROG_RUN. It returns 0 if the packet should
     88  * be accepted or -EPERM if the packet should be tossed.
     89  *
     90  */
     91 int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
     92 {
     93 	int err;
     94 	struct sk_filter *filter;
     95 
     96 	/*
     97 	 * If the skb was allocated from pfmemalloc reserves, only
     98 	 * allow SOCK_MEMALLOC sockets to use it as this socket is
     99 	 * helping free memory
    100 	 */
    101 	if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC)) {
    102 		NET_INC_STATS(sock_net(sk), LINUX_MIB_PFMEMALLOCDROP);
    103 		return -ENOMEM;
    104 	}
    105 	err = BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb);
    106 	if (err)
    107 		return err;
    108 
    109 	err = security_sock_rcv_skb(sk, skb);
    110 	if (err)
    111 		return err;
    112 
    113 	rcu_read_lock();
    114 	filter = rcu_dereference(sk->sk_filter);
    115 	if (filter) {
    116 		struct sock *save_sk = skb->sk;
    117 		unsigned int pkt_len;
    118 
    119 		skb->sk = sk;
    120 		pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
    121 		skb->sk = save_sk;
    122 		err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM;
    123 	}
    124 	rcu_read_unlock();
    125 
    126 	return err;
    127 }
    128 EXPORT_SYMBOL(sk_filter_trim_cap);
    129 
    130 BPF_CALL_1(bpf_skb_get_pay_offset, struct sk_buff *, skb)
    131 {
    132 	return skb_get_poff(skb);
    133 }
    134 
    135 BPF_CALL_3(bpf_skb_get_nlattr, struct sk_buff *, skb, u32, a, u32, x)
    136 {
    137 	struct nlattr *nla;
    138 
    139 	if (skb_is_nonlinear(skb))
    140 		return 0;
    141 
    142 	if (skb->len < sizeof(struct nlattr))
    143 		return 0;
    144 
    145 	if (a > skb->len - sizeof(struct nlattr))
    146 		return 0;
    147 
    148 	nla = nla_find((struct nlattr *) &skb->data[a], skb->len - a, x);
    149 	if (nla)
    150 		return (void *) nla - (void *) skb->data;
    151 
    152 	return 0;
    153 }
    154 
    155 BPF_CALL_3(bpf_skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x)
    156 {
    157 	struct nlattr *nla;
    158 
    159 	if (skb_is_nonlinear(skb))
    160 		return 0;
    161 
    162 	if (skb->len < sizeof(struct nlattr))
    163 		return 0;
    164 
    165 	if (a > skb->len - sizeof(struct nlattr))
    166 		return 0;
    167 
    168 	nla = (struct nlattr *) &skb->data[a];
    169 	if (nla->nla_len > skb->len - a)
    170 		return 0;
    171 
    172 	nla = nla_find_nested(nla, x);
    173 	if (nla)
    174 		return (void *) nla - (void *) skb->data;
    175 
    176 	return 0;
    177 }
    178 
    179 BPF_CALL_4(bpf_skb_load_helper_8, const struct sk_buff *, skb, const void *,
    180 	   data, int, headlen, int, offset)
    181 {
    182 	u8 tmp, *ptr;
    183 	const int len = sizeof(tmp);
    184 
    185 	if (offset >= 0) {
    186 		if (headlen - offset >= len)
    187 			return *(u8 *)(data + offset);
    188 		if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
    189 			return tmp;
    190 	} else {
    191 		ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
    192 		if (likely(ptr))
    193 			return *(u8 *)ptr;
    194 	}
    195 
    196 	return -EFAULT;
    197 }
    198 
    199 BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb,
    200 	   int, offset)
    201 {
    202 	return ____bpf_skb_load_helper_8(skb, skb->data, skb->len - skb->data_len,
    203 					 offset);
    204 }
    205 
    206 BPF_CALL_4(bpf_skb_load_helper_16, const struct sk_buff *, skb, const void *,
    207 	   data, int, headlen, int, offset)
    208 {
    209 	u16 tmp, *ptr;
    210 	const int len = sizeof(tmp);
    211 
    212 	if (offset >= 0) {
    213 		if (headlen - offset >= len)
    214 			return get_unaligned_be16(data + offset);
    215 		if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
    216 			return be16_to_cpu(tmp);
    217 	} else {
    218 		ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
    219 		if (likely(ptr))
    220 			return get_unaligned_be16(ptr);
    221 	}
    222 
    223 	return -EFAULT;
    224 }
    225 
    226 BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb,
    227 	   int, offset)
    228 {
    229 	return ____bpf_skb_load_helper_16(skb, skb->data, skb->len - skb->data_len,
    230 					  offset);
    231 }
    232 
    233 BPF_CALL_4(bpf_skb_load_helper_32, const struct sk_buff *, skb, const void *,
    234 	   data, int, headlen, int, offset)
    235 {
    236 	u32 tmp, *ptr;
    237 	const int len = sizeof(tmp);
    238 
    239 	if (likely(offset >= 0)) {
    240 		if (headlen - offset >= len)
    241 			return get_unaligned_be32(data + offset);
    242 		if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
    243 			return be32_to_cpu(tmp);
    244 	} else {
    245 		ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
    246 		if (likely(ptr))
    247 			return get_unaligned_be32(ptr);
    248 	}
    249 
    250 	return -EFAULT;
    251 }
    252 
    253 BPF_CALL_2(bpf_skb_load_helper_32_no_cache, const struct sk_buff *, skb,
    254 	   int, offset)
    255 {
    256 	return ____bpf_skb_load_helper_32(skb, skb->data, skb->len - skb->data_len,
    257 					  offset);
    258 }
    259 
    260 BPF_CALL_0(bpf_get_raw_cpu_id)
    261 {
    262 	return raw_smp_processor_id();
    263 }
    264 
    265 static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = {
    266 	.func		= bpf_get_raw_cpu_id,
    267 	.gpl_only	= false,
    268 	.ret_type	= RET_INTEGER,
    269 };
    270 
    271 static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
    272 			      struct bpf_insn *insn_buf)
    273 {
    274 	struct bpf_insn *insn = insn_buf;
    275 
    276 	switch (skb_field) {
    277 	case SKF_AD_MARK:
    278 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
    279 
    280 		*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
    281 				      offsetof(struct sk_buff, mark));
    282 		break;
    283 
    284 	case SKF_AD_PKTTYPE:
    285 		*insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_TYPE_OFFSET());
    286 		*insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, PKT_TYPE_MAX);
    287 #ifdef __BIG_ENDIAN_BITFIELD
    288 		*insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 5);
    289 #endif
    290 		break;
    291 
    292 	case SKF_AD_QUEUE:
    293 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
    294 
    295 		*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
    296 				      offsetof(struct sk_buff, queue_mapping));
    297 		break;
    298 
    299 	case SKF_AD_VLAN_TAG:
    300 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
    301 
    302 		/* dst_reg = *(u16 *) (src_reg + offsetof(vlan_tci)) */
    303 		*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
    304 				      offsetof(struct sk_buff, vlan_tci));
    305 		break;
    306 	case SKF_AD_VLAN_TAG_PRESENT:
    307 		*insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_VLAN_PRESENT_OFFSET());
    308 		if (PKT_VLAN_PRESENT_BIT)
    309 			*insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, PKT_VLAN_PRESENT_BIT);
    310 		if (PKT_VLAN_PRESENT_BIT < 7)
    311 			*insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, 1);
    312 		break;
    313 	}
    314 
    315 	return insn - insn_buf;
    316 }
    317 
    318 static bool convert_bpf_extensions(struct sock_filter *fp,
    319 				   struct bpf_insn **insnp)
    320 {
    321 	struct bpf_insn *insn = *insnp;
    322 	u32 cnt;
    323 
    324 	switch (fp->k) {
    325 	case SKF_AD_OFF + SKF_AD_PROTOCOL:
    326 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
    327 
    328 		/* A = *(u16 *) (CTX + offsetof(protocol)) */
    329 		*insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
    330 				      offsetof(struct sk_buff, protocol));
    331 		/* A = ntohs(A) [emitting a nop or swap16] */
    332 		*insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
    333 		break;
    334 
    335 	case SKF_AD_OFF + SKF_AD_PKTTYPE:
    336 		cnt = convert_skb_access(SKF_AD_PKTTYPE, BPF_REG_A, BPF_REG_CTX, insn);
    337 		insn += cnt - 1;
    338 		break;
    339 
    340 	case SKF_AD_OFF + SKF_AD_IFINDEX:
    341 	case SKF_AD_OFF + SKF_AD_HATYPE:
    342 		BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
    343 		BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
    344 
    345 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
    346 				      BPF_REG_TMP, BPF_REG_CTX,
    347 				      offsetof(struct sk_buff, dev));
    348 		/* if (tmp != 0) goto pc + 1 */
    349 		*insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, 0, 1);
    350 		*insn++ = BPF_EXIT_INSN();
    351 		if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX)
    352 			*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_TMP,
    353 					    offsetof(struct net_device, ifindex));
    354 		else
    355 			*insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_TMP,
    356 					    offsetof(struct net_device, type));
    357 		break;
    358 
    359 	case SKF_AD_OFF + SKF_AD_MARK:
    360 		cnt = convert_skb_access(SKF_AD_MARK, BPF_REG_A, BPF_REG_CTX, insn);
    361 		insn += cnt - 1;
    362 		break;
    363 
    364 	case SKF_AD_OFF + SKF_AD_RXHASH:
    365 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
    366 
    367 		*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
    368 				    offsetof(struct sk_buff, hash));
    369 		break;
    370 
    371 	case SKF_AD_OFF + SKF_AD_QUEUE:
    372 		cnt = convert_skb_access(SKF_AD_QUEUE, BPF_REG_A, BPF_REG_CTX, insn);
    373 		insn += cnt - 1;
    374 		break;
    375 
    376 	case SKF_AD_OFF + SKF_AD_VLAN_TAG:
    377 		cnt = convert_skb_access(SKF_AD_VLAN_TAG,
    378 					 BPF_REG_A, BPF_REG_CTX, insn);
    379 		insn += cnt - 1;
    380 		break;
    381 
    382 	case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
    383 		cnt = convert_skb_access(SKF_AD_VLAN_TAG_PRESENT,
    384 					 BPF_REG_A, BPF_REG_CTX, insn);
    385 		insn += cnt - 1;
    386 		break;
    387 
    388 	case SKF_AD_OFF + SKF_AD_VLAN_TPID:
    389 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_proto) != 2);
    390 
    391 		/* A = *(u16 *) (CTX + offsetof(vlan_proto)) */
    392 		*insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
    393 				      offsetof(struct sk_buff, vlan_proto));
    394 		/* A = ntohs(A) [emitting a nop or swap16] */
    395 		*insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
    396 		break;
    397 
    398 	case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
    399 	case SKF_AD_OFF + SKF_AD_NLATTR:
    400 	case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
    401 	case SKF_AD_OFF + SKF_AD_CPU:
    402 	case SKF_AD_OFF + SKF_AD_RANDOM:
    403 		/* arg1 = CTX */
    404 		*insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
    405 		/* arg2 = A */
    406 		*insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_A);
    407 		/* arg3 = X */
    408 		*insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_X);
    409 		/* Emit call(arg1=CTX, arg2=A, arg3=X) */
    410 		switch (fp->k) {
    411 		case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
    412 			*insn = BPF_EMIT_CALL(bpf_skb_get_pay_offset);
    413 			break;
    414 		case SKF_AD_OFF + SKF_AD_NLATTR:
    415 			*insn = BPF_EMIT_CALL(bpf_skb_get_nlattr);
    416 			break;
    417 		case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
    418 			*insn = BPF_EMIT_CALL(bpf_skb_get_nlattr_nest);
    419 			break;
    420 		case SKF_AD_OFF + SKF_AD_CPU:
    421 			*insn = BPF_EMIT_CALL(bpf_get_raw_cpu_id);
    422 			break;
    423 		case SKF_AD_OFF + SKF_AD_RANDOM:
    424 			*insn = BPF_EMIT_CALL(bpf_user_rnd_u32);
    425 			bpf_user_rnd_init_once();
    426 			break;
    427 		}
    428 		break;
    429 
    430 	case SKF_AD_OFF + SKF_AD_ALU_XOR_X:
    431 		/* A ^= X */
    432 		*insn = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_X);
    433 		break;
    434 
    435 	default:
    436 		/* This is just a dummy call to avoid letting the compiler
    437 		 * evict __bpf_call_base() as an optimization. Placed here
    438 		 * where no-one bothers.
    439 		 */
    440 		BUG_ON(__bpf_call_base(0, 0, 0, 0, 0) != 0);
    441 		return false;
    442 	}
    443 
    444 	*insnp = insn;
    445 	return true;
    446 }
    447 
    448 static bool convert_bpf_ld_abs(struct sock_filter *fp, struct bpf_insn **insnp)
    449 {
    450 	const bool unaligned_ok = IS_BUILTIN(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS);
    451 	int size = bpf_size_to_bytes(BPF_SIZE(fp->code));
    452 	bool endian = BPF_SIZE(fp->code) == BPF_H ||
    453 		      BPF_SIZE(fp->code) == BPF_W;
    454 	bool indirect = BPF_MODE(fp->code) == BPF_IND;
    455 	const int ip_align = NET_IP_ALIGN;
    456 	struct bpf_insn *insn = *insnp;
    457 	int offset = fp->k;
    458 
    459 	if (!indirect &&
    460 	    ((unaligned_ok && offset >= 0) ||
    461 	     (!unaligned_ok && offset >= 0 &&
    462 	      offset + ip_align >= 0 &&
    463 	      offset + ip_align % size == 0))) {
    464 		bool ldx_off_ok = offset <= S16_MAX;
    465 
    466 		*insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_H);
    467 		if (offset)
    468 			*insn++ = BPF_ALU64_IMM(BPF_SUB, BPF_REG_TMP, offset);
    469 		*insn++ = BPF_JMP_IMM(BPF_JSLT, BPF_REG_TMP,
    470 				      size, 2 + endian + (!ldx_off_ok * 2));
    471 		if (ldx_off_ok) {
    472 			*insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A,
    473 					      BPF_REG_D, offset);
    474 		} else {
    475 			*insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_D);
    476 			*insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_TMP, offset);
    477 			*insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A,
    478 					      BPF_REG_TMP, 0);
    479 		}
    480 		if (endian)
    481 			*insn++ = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, size * 8);
    482 		*insn++ = BPF_JMP_A(8);
    483 	}
    484 
    485 	*insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
    486 	*insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_D);
    487 	*insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_H);
    488 	if (!indirect) {
    489 		*insn++ = BPF_MOV64_IMM(BPF_REG_ARG4, offset);
    490 	} else {
    491 		*insn++ = BPF_MOV64_REG(BPF_REG_ARG4, BPF_REG_X);
    492 		if (fp->k)
    493 			*insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG4, offset);
    494 	}
    495 
    496 	switch (BPF_SIZE(fp->code)) {
    497 	case BPF_B:
    498 		*insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_8);
    499 		break;
    500 	case BPF_H:
    501 		*insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_16);
    502 		break;
    503 	case BPF_W:
    504 		*insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_32);
    505 		break;
    506 	default:
    507 		return false;
    508 	}
    509 
    510 	*insn++ = BPF_JMP_IMM(BPF_JSGE, BPF_REG_A, 0, 2);
    511 	*insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
    512 	*insn   = BPF_EXIT_INSN();
    513 
    514 	*insnp = insn;
    515 	return true;
    516 }
    517 
    518 /**
    519  *	bpf_convert_filter - convert filter program
    520  *	@prog: the user passed filter program
    521  *	@len: the length of the user passed filter program
    522  *	@new_prog: allocated 'struct bpf_prog' or NULL
    523  *	@new_len: pointer to store length of converted program
    524  *	@seen_ld_abs: bool whether we've seen ld_abs/ind
    525  *
    526  * Remap 'sock_filter' style classic BPF (cBPF) instruction set to 'bpf_insn'
    527  * style extended BPF (eBPF).
    528  * Conversion workflow:
    529  *
    530  * 1) First pass for calculating the new program length:
    531  *   bpf_convert_filter(old_prog, old_len, NULL, &new_len, &seen_ld_abs)
    532  *
    533  * 2) 2nd pass to remap in two passes: 1st pass finds new
    534  *    jump offsets, 2nd pass remapping:
    535  *   bpf_convert_filter(old_prog, old_len, new_prog, &new_len, &seen_ld_abs)
    536  */
    537 static int bpf_convert_filter(struct sock_filter *prog, int len,
    538 			      struct bpf_prog *new_prog, int *new_len,
    539 			      bool *seen_ld_abs)
    540 {
    541 	int new_flen = 0, pass = 0, target, i, stack_off;
    542 	struct bpf_insn *new_insn, *first_insn = NULL;
    543 	struct sock_filter *fp;
    544 	int *addrs = NULL;
    545 	u8 bpf_src;
    546 
    547 	BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK);
    548 	BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
    549 
    550 	if (len <= 0 || len > BPF_MAXINSNS)
    551 		return -EINVAL;
    552 
    553 	if (new_prog) {
    554 		first_insn = new_prog->insnsi;
    555 		addrs = kcalloc(len, sizeof(*addrs),
    556 				GFP_KERNEL | __GFP_NOWARN);
    557 		if (!addrs)
    558 			return -ENOMEM;
    559 	}
    560 
    561 do_pass:
    562 	new_insn = first_insn;
    563 	fp = prog;
    564 
    565 	/* Classic BPF related prologue emission. */
    566 	if (new_prog) {
    567 		/* Classic BPF expects A and X to be reset first. These need
    568 		 * to be guaranteed to be the first two instructions.
    569 		 */
    570 		*new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
    571 		*new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_X, BPF_REG_X);
    572 
    573 		/* All programs must keep CTX in callee saved BPF_REG_CTX.
    574 		 * In eBPF case it's done by the compiler, here we need to
    575 		 * do this ourself. Initial CTX is present in BPF_REG_ARG1.
    576 		 */
    577 		*new_insn++ = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1);
    578 		if (*seen_ld_abs) {
    579 			/* For packet access in classic BPF, cache skb->data
    580 			 * in callee-saved BPF R8 and skb->len - skb->data_len
    581 			 * (headlen) in BPF R9. Since classic BPF is read-only
    582 			 * on CTX, we only need to cache it once.
    583 			 */
    584 			*new_insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
    585 						  BPF_REG_D, BPF_REG_CTX,
    586 						  offsetof(struct sk_buff, data));
    587 			*new_insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_H, BPF_REG_CTX,
    588 						  offsetof(struct sk_buff, len));
    589 			*new_insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_TMP, BPF_REG_CTX,
    590 						  offsetof(struct sk_buff, data_len));
    591 			*new_insn++ = BPF_ALU32_REG(BPF_SUB, BPF_REG_H, BPF_REG_TMP);
    592 		}
    593 	} else {
    594 		new_insn += 3;
    595 	}
    596 
    597 	for (i = 0; i < len; fp++, i++) {
    598 		struct bpf_insn tmp_insns[32] = { };
    599 		struct bpf_insn *insn = tmp_insns;
    600 
    601 		if (addrs)
    602 			addrs[i] = new_insn - first_insn;
    603 
    604 		switch (fp->code) {
    605 		/* All arithmetic insns and skb loads map as-is. */
    606 		case BPF_ALU | BPF_ADD | BPF_X:
    607 		case BPF_ALU | BPF_ADD | BPF_K:
    608 		case BPF_ALU | BPF_SUB | BPF_X:
    609 		case BPF_ALU | BPF_SUB | BPF_K:
    610 		case BPF_ALU | BPF_AND | BPF_X:
    611 		case BPF_ALU | BPF_AND | BPF_K:
    612 		case BPF_ALU | BPF_OR | BPF_X:
    613 		case BPF_ALU | BPF_OR | BPF_K:
    614 		case BPF_ALU | BPF_LSH | BPF_X:
    615 		case BPF_ALU | BPF_LSH | BPF_K:
    616 		case BPF_ALU | BPF_RSH | BPF_X:
    617 		case BPF_ALU | BPF_RSH | BPF_K:
    618 		case BPF_ALU | BPF_XOR | BPF_X:
    619 		case BPF_ALU | BPF_XOR | BPF_K:
    620 		case BPF_ALU | BPF_MUL | BPF_X:
    621 		case BPF_ALU | BPF_MUL | BPF_K:
    622 		case BPF_ALU | BPF_DIV | BPF_X:
    623 		case BPF_ALU | BPF_DIV | BPF_K:
    624 		case BPF_ALU | BPF_MOD | BPF_X:
    625 		case BPF_ALU | BPF_MOD | BPF_K:
    626 		case BPF_ALU | BPF_NEG:
    627 		case BPF_LD | BPF_ABS | BPF_W:
    628 		case BPF_LD | BPF_ABS | BPF_H:
    629 		case BPF_LD | BPF_ABS | BPF_B:
    630 		case BPF_LD | BPF_IND | BPF_W:
    631 		case BPF_LD | BPF_IND | BPF_H:
    632 		case BPF_LD | BPF_IND | BPF_B:
    633 			/* Check for overloaded BPF extension and
    634 			 * directly convert it if found, otherwise
    635 			 * just move on with mapping.
    636 			 */
    637 			if (BPF_CLASS(fp->code) == BPF_LD &&
    638 			    BPF_MODE(fp->code) == BPF_ABS &&
    639 			    convert_bpf_extensions(fp, &insn))
    640 				break;
    641 			if (BPF_CLASS(fp->code) == BPF_LD &&
    642 			    convert_bpf_ld_abs(fp, &insn)) {
    643 				*seen_ld_abs = true;
    644 				break;
    645 			}
    646 
    647 			if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) ||
    648 			    fp->code == (BPF_ALU | BPF_MOD | BPF_X)) {
    649 				*insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X);
    650 				/* Error with exception code on div/mod by 0.
    651 				 * For cBPF programs, this was always return 0.
    652 				 */
    653 				*insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_X, 0, 2);
    654 				*insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
    655 				*insn++ = BPF_EXIT_INSN();
    656 			}
    657 
    658 			*insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k);
    659 			break;
    660 
    661 		/* Jump transformation cannot use BPF block macros
    662 		 * everywhere as offset calculation and target updates
    663 		 * require a bit more work than the rest, i.e. jump
    664 		 * opcodes map as-is, but offsets need adjustment.
    665 		 */
    666 
    667 #define BPF_EMIT_JMP							\
    668 	do {								\
    669 		const s32 off_min = S16_MIN, off_max = S16_MAX;		\
    670 		s32 off;						\
    671 									\
    672 		if (target >= len || target < 0)			\
    673 			goto err;					\
    674 		off = addrs ? addrs[target] - addrs[i] - 1 : 0;		\
    675 		/* Adjust pc relative offset for 2nd or 3rd insn. */	\
    676 		off -= insn - tmp_insns;				\
    677 		/* Reject anything not fitting into insn->off. */	\
    678 		if (off < off_min || off > off_max)			\
    679 			goto err;					\
    680 		insn->off = off;					\
    681 	} while (0)
    682 
    683 		case BPF_JMP | BPF_JA:
    684 			target = i + fp->k + 1;
    685 			insn->code = fp->code;
    686 			BPF_EMIT_JMP;
    687 			break;
    688 
    689 		case BPF_JMP | BPF_JEQ | BPF_K:
    690 		case BPF_JMP | BPF_JEQ | BPF_X:
    691 		case BPF_JMP | BPF_JSET | BPF_K:
    692 		case BPF_JMP | BPF_JSET | BPF_X:
    693 		case BPF_JMP | BPF_JGT | BPF_K:
    694 		case BPF_JMP | BPF_JGT | BPF_X:
    695 		case BPF_JMP | BPF_JGE | BPF_K:
    696 		case BPF_JMP | BPF_JGE | BPF_X:
    697 			if (BPF_SRC(fp->code) == BPF_K && (int) fp->k < 0) {
    698 				/* BPF immediates are signed, zero extend
    699 				 * immediate into tmp register and use it
    700 				 * in compare insn.
    701 				 */
    702 				*insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k);
    703 
    704 				insn->dst_reg = BPF_REG_A;
    705 				insn->src_reg = BPF_REG_TMP;
    706 				bpf_src = BPF_X;
    707 			} else {
    708 				insn->dst_reg = BPF_REG_A;
    709 				insn->imm = fp->k;
    710 				bpf_src = BPF_SRC(fp->code);
    711 				insn->src_reg = bpf_src == BPF_X ? BPF_REG_X : 0;
    712 			}
    713 
    714 			/* Common case where 'jump_false' is next insn. */
    715 			if (fp->jf == 0) {
    716 				insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
    717 				target = i + fp->jt + 1;
    718 				BPF_EMIT_JMP;
    719 				break;
    720 			}
    721 
    722 			/* Convert some jumps when 'jump_true' is next insn. */
    723 			if (fp->jt == 0) {
    724 				switch (BPF_OP(fp->code)) {
    725 				case BPF_JEQ:
    726 					insn->code = BPF_JMP | BPF_JNE | bpf_src;
    727 					break;
    728 				case BPF_JGT:
    729 					insn->code = BPF_JMP | BPF_JLE | bpf_src;
    730 					break;
    731 				case BPF_JGE:
    732 					insn->code = BPF_JMP | BPF_JLT | bpf_src;
    733 					break;
    734 				default:
    735 					goto jmp_rest;
    736 				}
    737 
    738 				target = i + fp->jf + 1;
    739 				BPF_EMIT_JMP;
    740 				break;
    741 			}
    742 jmp_rest:
    743 			/* Other jumps are mapped into two insns: Jxx and JA. */
    744 			target = i + fp->jt + 1;
    745 			insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
    746 			BPF_EMIT_JMP;
    747 			insn++;
    748 
    749 			insn->code = BPF_JMP | BPF_JA;
    750 			target = i + fp->jf + 1;
    751 			BPF_EMIT_JMP;
    752 			break;
    753 
    754 		/* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */
    755 		case BPF_LDX | BPF_MSH | BPF_B: {
    756 			struct sock_filter tmp = {
    757 				.code	= BPF_LD | BPF_ABS | BPF_B,
    758 				.k	= fp->k,
    759 			};
    760 
    761 			*seen_ld_abs = true;
    762 
    763 			/* X = A */
    764 			*insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
    765 			/* A = BPF_R0 = *(u8 *) (skb->data + K) */
    766 			convert_bpf_ld_abs(&tmp, &insn);
    767 			insn++;
    768 			/* A &= 0xf */
    769 			*insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf);
    770 			/* A <<= 2 */
    771 			*insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2);
    772 			/* tmp = X */
    773 			*insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_X);
    774 			/* X = A */
    775 			*insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
    776 			/* A = tmp */
    777 			*insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP);
    778 			break;
    779 		}
    780 		/* RET_K is remaped into 2 insns. RET_A case doesn't need an
    781 		 * extra mov as BPF_REG_0 is already mapped into BPF_REG_A.
    782 		 */
    783 		case BPF_RET | BPF_A:
    784 		case BPF_RET | BPF_K:
    785 			if (BPF_RVAL(fp->code) == BPF_K)
    786 				*insn++ = BPF_MOV32_RAW(BPF_K, BPF_REG_0,
    787 							0, fp->k);
    788 			*insn = BPF_EXIT_INSN();
    789 			break;
    790 
    791 		/* Store to stack. */
    792 		case BPF_ST:
    793 		case BPF_STX:
    794 			stack_off = fp->k * 4  + 4;
    795 			*insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) ==
    796 					    BPF_ST ? BPF_REG_A : BPF_REG_X,
    797 					    -stack_off);
    798 			/* check_load_and_stores() verifies that classic BPF can
    799 			 * load from stack only after write, so tracking
    800 			 * stack_depth for ST|STX insns is enough
    801 			 */
    802 			if (new_prog && new_prog->aux->stack_depth < stack_off)
    803 				new_prog->aux->stack_depth = stack_off;
    804 			break;
    805 
    806 		/* Load from stack. */
    807 		case BPF_LD | BPF_MEM:
    808 		case BPF_LDX | BPF_MEM:
    809 			stack_off = fp->k * 4  + 4;
    810 			*insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD  ?
    811 					    BPF_REG_A : BPF_REG_X, BPF_REG_FP,
    812 					    -stack_off);
    813 			break;
    814 
    815 		/* A = K or X = K */
    816 		case BPF_LD | BPF_IMM:
    817 		case BPF_LDX | BPF_IMM:
    818 			*insn = BPF_MOV32_IMM(BPF_CLASS(fp->code) == BPF_LD ?
    819 					      BPF_REG_A : BPF_REG_X, fp->k);
    820 			break;
    821 
    822 		/* X = A */
    823 		case BPF_MISC | BPF_TAX:
    824 			*insn = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
    825 			break;
    826 
    827 		/* A = X */
    828 		case BPF_MISC | BPF_TXA:
    829 			*insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_X);
    830 			break;
    831 
    832 		/* A = skb->len or X = skb->len */
    833 		case BPF_LD | BPF_W | BPF_LEN:
    834 		case BPF_LDX | BPF_W | BPF_LEN:
    835 			*insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
    836 					    BPF_REG_A : BPF_REG_X, BPF_REG_CTX,
    837 					    offsetof(struct sk_buff, len));
    838 			break;
    839 
    840 		/* Access seccomp_data fields. */
    841 		case BPF_LDX | BPF_ABS | BPF_W:
    842 			/* A = *(u32 *) (ctx + K) */
    843 			*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k);
    844 			break;
    845 
    846 		/* Unknown instruction. */
    847 		default:
    848 			goto err;
    849 		}
    850 
    851 		insn++;
    852 		if (new_prog)
    853 			memcpy(new_insn, tmp_insns,
    854 			       sizeof(*insn) * (insn - tmp_insns));
    855 		new_insn += insn - tmp_insns;
    856 	}
    857 
    858 	if (!new_prog) {
    859 		/* Only calculating new length. */
    860 		*new_len = new_insn - first_insn;
    861 		if (*seen_ld_abs)
    862 			*new_len += 4; /* Prologue bits. */
    863 		return 0;
    864 	}
    865 
    866 	pass++;
    867 	if (new_flen != new_insn - first_insn) {
    868 		new_flen = new_insn - first_insn;
    869 		if (pass > 2)
    870 			goto err;
    871 		goto do_pass;
    872 	}
    873 
    874 	kfree(addrs);
    875 	BUG_ON(*new_len != new_flen);
    876 	return 0;
    877 err:
    878 	kfree(addrs);
    879 	return -EINVAL;
    880 }
    881 
    882 /* Security:
    883  *
    884  * As we dont want to clear mem[] array for each packet going through
    885  * __bpf_prog_run(), we check that filter loaded by user never try to read
    886  * a cell if not previously written, and we check all branches to be sure
    887  * a malicious user doesn't try to abuse us.
    888  */
    889 static int check_load_and_stores(const struct sock_filter *filter, int flen)
    890 {
    891 	u16 *masks, memvalid = 0; /* One bit per cell, 16 cells */
    892 	int pc, ret = 0;
    893 
    894 	BUILD_BUG_ON(BPF_MEMWORDS > 16);
    895 
    896 	masks = kmalloc_array(flen, sizeof(*masks), GFP_KERNEL);
    897 	if (!masks)
    898 		return -ENOMEM;
    899 
    900 	memset(masks, 0xff, flen * sizeof(*masks));
    901 
    902 	for (pc = 0; pc < flen; pc++) {
    903 		memvalid &= masks[pc];
    904 
    905 		switch (filter[pc].code) {
    906 		case BPF_ST:
    907 		case BPF_STX:
    908 			memvalid |= (1 << filter[pc].k);
    909 			break;
    910 		case BPF_LD | BPF_MEM:
    911 		case BPF_LDX | BPF_MEM:
    912 			if (!(memvalid & (1 << filter[pc].k))) {
    913 				ret = -EINVAL;
    914 				goto error;
    915 			}
    916 			break;
    917 		case BPF_JMP | BPF_JA:
    918 			/* A jump must set masks on target */
    919 			masks[pc + 1 + filter[pc].k] &= memvalid;
    920 			memvalid = ~0;
    921 			break;
    922 		case BPF_JMP | BPF_JEQ | BPF_K:
    923 		case BPF_JMP | BPF_JEQ | BPF_X:
    924 		case BPF_JMP | BPF_JGE | BPF_K:
    925 		case BPF_JMP | BPF_JGE | BPF_X:
    926 		case BPF_JMP | BPF_JGT | BPF_K:
    927 		case BPF_JMP | BPF_JGT | BPF_X:
    928 		case BPF_JMP | BPF_JSET | BPF_K:
    929 		case BPF_JMP | BPF_JSET | BPF_X:
    930 			/* A jump must set masks on targets */
    931 			masks[pc + 1 + filter[pc].jt] &= memvalid;
    932 			masks[pc + 1 + filter[pc].jf] &= memvalid;
    933 			memvalid = ~0;
    934 			break;
    935 		}
    936 	}
    937 error:
    938 	kfree(masks);
    939 	return ret;
    940 }
    941 
    942 static bool chk_code_allowed(u16 code_to_probe)
    943 {
    944 	static const bool codes[] = {
    945 		/* 32 bit ALU operations */
    946 		[BPF_ALU | BPF_ADD | BPF_K] = true,
    947 		[BPF_ALU | BPF_ADD | BPF_X] = true,
    948 		[BPF_ALU | BPF_SUB | BPF_K] = true,
    949 		[BPF_ALU | BPF_SUB | BPF_X] = true,
    950 		[BPF_ALU | BPF_MUL | BPF_K] = true,
    951 		[BPF_ALU | BPF_MUL | BPF_X] = true,
    952 		[BPF_ALU | BPF_DIV | BPF_K] = true,
    953 		[BPF_ALU | BPF_DIV | BPF_X] = true,
    954 		[BPF_ALU | BPF_MOD | BPF_K] = true,
    955 		[BPF_ALU | BPF_MOD | BPF_X] = true,
    956 		[BPF_ALU | BPF_AND | BPF_K] = true,
    957 		[BPF_ALU | BPF_AND | BPF_X] = true,
    958 		[BPF_ALU | BPF_OR | BPF_K] = true,
    959 		[BPF_ALU | BPF_OR | BPF_X] = true,
    960 		[BPF_ALU | BPF_XOR | BPF_K] = true,
    961 		[BPF_ALU | BPF_XOR | BPF_X] = true,
    962 		[BPF_ALU | BPF_LSH | BPF_K] = true,
    963 		[BPF_ALU | BPF_LSH | BPF_X] = true,
    964 		[BPF_ALU | BPF_RSH | BPF_K] = true,
    965 		[BPF_ALU | BPF_RSH | BPF_X] = true,
    966 		[BPF_ALU | BPF_NEG] = true,
    967 		/* Load instructions */
    968 		[BPF_LD | BPF_W | BPF_ABS] = true,
    969 		[BPF_LD | BPF_H | BPF_ABS] = true,
    970 		[BPF_LD | BPF_B | BPF_ABS] = true,
    971 		[BPF_LD | BPF_W | BPF_LEN] = true,
    972 		[BPF_LD | BPF_W | BPF_IND] = true,
    973 		[BPF_LD | BPF_H | BPF_IND] = true,
    974 		[BPF_LD | BPF_B | BPF_IND] = true,
    975 		[BPF_LD | BPF_IMM] = true,
    976 		[BPF_LD | BPF_MEM] = true,
    977 		[BPF_LDX | BPF_W | BPF_LEN] = true,
    978 		[BPF_LDX | BPF_B | BPF_MSH] = true,
    979 		[BPF_LDX | BPF_IMM] = true,
    980 		[BPF_LDX | BPF_MEM] = true,
    981 		/* Store instructions */
    982 		[BPF_ST] = true,
    983 		[BPF_STX] = true,
    984 		/* Misc instructions */
    985 		[BPF_MISC | BPF_TAX] = true,
    986 		[BPF_MISC | BPF_TXA] = true,
    987 		/* Return instructions */
    988 		[BPF_RET | BPF_K] = true,
    989 		[BPF_RET | BPF_A] = true,
    990 		/* Jump instructions */
    991 		[BPF_JMP | BPF_JA] = true,
    992 		[BPF_JMP | BPF_JEQ | BPF_K] = true,
    993 		[BPF_JMP | BPF_JEQ | BPF_X] = true,
    994 		[BPF_JMP | BPF_JGE | BPF_K] = true,
    995 		[BPF_JMP | BPF_JGE | BPF_X] = true,
    996 		[BPF_JMP | BPF_JGT | BPF_K] = true,
    997 		[BPF_JMP | BPF_JGT | BPF_X] = true,
    998 		[BPF_JMP | BPF_JSET | BPF_K] = true,
    999 		[BPF_JMP | BPF_JSET | BPF_X] = true,
   1000 	};
   1001 
   1002 	if (code_to_probe >= ARRAY_SIZE(codes))
   1003 		return false;
   1004 
   1005 	return codes[code_to_probe];
   1006 }
   1007 
   1008 static bool bpf_check_basics_ok(const struct sock_filter *filter,
   1009 				unsigned int flen)
   1010 {
   1011 	if (filter == NULL)
   1012 		return false;
   1013 	if (flen == 0 || flen > BPF_MAXINSNS)
   1014 		return false;
   1015 
   1016 	return true;
   1017 }
   1018 
   1019 /**
   1020  *	bpf_check_classic - verify socket filter code
   1021  *	@filter: filter to verify
   1022  *	@flen: length of filter
   1023  *
   1024  * Check the user's filter code. If we let some ugly
   1025  * filter code slip through kaboom! The filter must contain
   1026  * no references or jumps that are out of range, no illegal
   1027  * instructions, and must end with a RET instruction.
   1028  *
   1029  * All jumps are forward as they are not signed.
   1030  *
   1031  * Returns 0 if the rule set is legal or -EINVAL if not.
   1032  */
   1033 static int bpf_check_classic(const struct sock_filter *filter,
   1034 			     unsigned int flen)
   1035 {
   1036 	bool anc_found;
   1037 	int pc;
   1038 
   1039 	/* Check the filter code now */
   1040 	for (pc = 0; pc < flen; pc++) {
   1041 		const struct sock_filter *ftest = &filter[pc];
   1042 
   1043 		/* May we actually operate on this code? */
   1044 		if (!chk_code_allowed(ftest->code))
   1045 			return -EINVAL;
   1046 
   1047 		/* Some instructions need special checks */
   1048 		switch (ftest->code) {
   1049 		case BPF_ALU | BPF_DIV | BPF_K:
   1050 		case BPF_ALU | BPF_MOD | BPF_K:
   1051 			/* Check for division by zero */
   1052 			if (ftest->k == 0)
   1053 				return -EINVAL;
   1054 			break;
   1055 		case BPF_ALU | BPF_LSH | BPF_K:
   1056 		case BPF_ALU | BPF_RSH | BPF_K:
   1057 			if (ftest->k >= 32)
   1058 				return -EINVAL;
   1059 			break;
   1060 		case BPF_LD | BPF_MEM:
   1061 		case BPF_LDX | BPF_MEM:
   1062 		case BPF_ST:
   1063 		case BPF_STX:
   1064 			/* Check for invalid memory addresses */
   1065 			if (ftest->k >= BPF_MEMWORDS)
   1066 				return -EINVAL;
   1067 			break;
   1068 		case BPF_JMP | BPF_JA:
   1069 			/* Note, the large ftest->k might cause loops.
   1070 			 * Compare this with conditional jumps below,
   1071 			 * where offsets are limited. --ANK (981016)
   1072 			 */
   1073 			if (ftest->k >= (unsigned int)(flen - pc - 1))
   1074 				return -EINVAL;
   1075 			break;
   1076 		case BPF_JMP | BPF_JEQ | BPF_K:
   1077 		case BPF_JMP | BPF_JEQ | BPF_X:
   1078 		case BPF_JMP | BPF_JGE | BPF_K:
   1079 		case BPF_JMP | BPF_JGE | BPF_X:
   1080 		case BPF_JMP | BPF_JGT | BPF_K:
   1081 		case BPF_JMP | BPF_JGT | BPF_X:
   1082 		case BPF_JMP | BPF_JSET | BPF_K:
   1083 		case BPF_JMP | BPF_JSET | BPF_X:
   1084 			/* Both conditionals must be safe */
   1085 			if (pc + ftest->jt + 1 >= flen ||
   1086 			    pc + ftest->jf + 1 >= flen)
   1087 				return -EINVAL;
   1088 			break;
   1089 		case BPF_LD | BPF_W | BPF_ABS:
   1090 		case BPF_LD | BPF_H | BPF_ABS:
   1091 		case BPF_LD | BPF_B | BPF_ABS:
   1092 			anc_found = false;
   1093 			if (bpf_anc_helper(ftest) & BPF_ANC)
   1094 				anc_found = true;
   1095 			/* Ancillary operation unknown or unsupported */
   1096 			if (anc_found == false && ftest->k >= SKF_AD_OFF)
   1097 				return -EINVAL;
   1098 		}
   1099 	}
   1100 
   1101 	/* Last instruction must be a RET code */
   1102 	switch (filter[flen - 1].code) {
   1103 	case BPF_RET | BPF_K:
   1104 	case BPF_RET | BPF_A:
   1105 		return check_load_and_stores(filter, flen);
   1106 	}
   1107 
   1108 	return -EINVAL;
   1109 }
   1110 
   1111 static int bpf_prog_store_orig_filter(struct bpf_prog *fp,
   1112 				      const struct sock_fprog *fprog)
   1113 {
   1114 	unsigned int fsize = bpf_classic_proglen(fprog);
   1115 	struct sock_fprog_kern *fkprog;
   1116 
   1117 	fp->orig_prog = kmalloc(sizeof(*fkprog), GFP_KERNEL);
   1118 	if (!fp->orig_prog)
   1119 		return -ENOMEM;
   1120 
   1121 	fkprog = fp->orig_prog;
   1122 	fkprog->len = fprog->len;
   1123 
   1124 	fkprog->filter = kmemdup(fp->insns, fsize,
   1125 				 GFP_KERNEL | __GFP_NOWARN);
   1126 	if (!fkprog->filter) {
   1127 		kfree(fp->orig_prog);
   1128 		return -ENOMEM;
   1129 	}
   1130 
   1131 	return 0;
   1132 }
   1133 
   1134 static void bpf_release_orig_filter(struct bpf_prog *fp)
   1135 {
   1136 	struct sock_fprog_kern *fprog = fp->orig_prog;
   1137 
   1138 	if (fprog) {
   1139 		kfree(fprog->filter);
   1140 		kfree(fprog);
   1141 	}
   1142 }
   1143 
   1144 static void __bpf_prog_release(struct bpf_prog *prog)
   1145 {
   1146 	if (prog->type == BPF_PROG_TYPE_SOCKET_FILTER) {
   1147 		bpf_prog_put(prog);
   1148 	} else {
   1149 		bpf_release_orig_filter(prog);
   1150 		bpf_prog_free(prog);
   1151 	}
   1152 }
   1153 
   1154 static void __sk_filter_release(struct sk_filter *fp)
   1155 {
   1156 	__bpf_prog_release(fp->prog);
   1157 	kfree(fp);
   1158 }
   1159 
   1160 /**
   1161  * 	sk_filter_release_rcu - Release a socket filter by rcu_head
   1162  *	@rcu: rcu_head that contains the sk_filter to free
   1163  */
   1164 static void sk_filter_release_rcu(struct rcu_head *rcu)
   1165 {
   1166 	struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
   1167 
   1168 	__sk_filter_release(fp);
   1169 }
   1170 
   1171 /**
   1172  *	sk_filter_release - release a socket filter
   1173  *	@fp: filter to remove
   1174  *
   1175  *	Remove a filter from a socket and release its resources.
   1176  */
   1177 static void sk_filter_release(struct sk_filter *fp)
   1178 {
   1179 	if (refcount_dec_and_test(&fp->refcnt))
   1180 		call_rcu(&fp->rcu, sk_filter_release_rcu);
   1181 }
   1182 
   1183 void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
   1184 {
   1185 	u32 filter_size = bpf_prog_size(fp->prog->len);
   1186 
   1187 	atomic_sub(filter_size, &sk->sk_omem_alloc);
   1188 	sk_filter_release(fp);
   1189 }
   1190 
   1191 /* try to charge the socket memory if there is space available
   1192  * return true on success
   1193  */
   1194 static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp)
   1195 {
   1196 	u32 filter_size = bpf_prog_size(fp->prog->len);
   1197 
   1198 	/* same check as in sock_kmalloc() */
   1199 	if (filter_size <= sysctl_optmem_max &&
   1200 	    atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) {
   1201 		atomic_add(filter_size, &sk->sk_omem_alloc);
   1202 		return true;
   1203 	}
   1204 	return false;
   1205 }
   1206 
   1207 bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
   1208 {
   1209 	if (!refcount_inc_not_zero(&fp->refcnt))
   1210 		return false;
   1211 
   1212 	if (!__sk_filter_charge(sk, fp)) {
   1213 		sk_filter_release(fp);
   1214 		return false;
   1215 	}
   1216 	return true;
   1217 }
   1218 
   1219 static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
   1220 {
   1221 	struct sock_filter *old_prog;
   1222 	struct bpf_prog *old_fp;
   1223 	int err, new_len, old_len = fp->len;
   1224 	bool seen_ld_abs = false;
   1225 
   1226 	/* We are free to overwrite insns et al right here as it
   1227 	 * won't be used at this point in time anymore internally
   1228 	 * after the migration to the internal BPF instruction
   1229 	 * representation.
   1230 	 */
   1231 	BUILD_BUG_ON(sizeof(struct sock_filter) !=
   1232 		     sizeof(struct bpf_insn));
   1233 
   1234 	/* Conversion cannot happen on overlapping memory areas,
   1235 	 * so we need to keep the user BPF around until the 2nd
   1236 	 * pass. At this time, the user BPF is stored in fp->insns.
   1237 	 */
   1238 	old_prog = kmemdup(fp->insns, old_len * sizeof(struct sock_filter),
   1239 			   GFP_KERNEL | __GFP_NOWARN);
   1240 	if (!old_prog) {
   1241 		err = -ENOMEM;
   1242 		goto out_err;
   1243 	}
   1244 
   1245 	/* 1st pass: calculate the new program length. */
   1246 	err = bpf_convert_filter(old_prog, old_len, NULL, &new_len,
   1247 				 &seen_ld_abs);
   1248 	if (err)
   1249 		goto out_err_free;
   1250 
   1251 	/* Expand fp for appending the new filter representation. */
   1252 	old_fp = fp;
   1253 	fp = bpf_prog_realloc(old_fp, bpf_prog_size(new_len), 0);
   1254 	if (!fp) {
   1255 		/* The old_fp is still around in case we couldn't
   1256 		 * allocate new memory, so uncharge on that one.
   1257 		 */
   1258 		fp = old_fp;
   1259 		err = -ENOMEM;
   1260 		goto out_err_free;
   1261 	}
   1262 
   1263 	fp->len = new_len;
   1264 
   1265 	/* 2nd pass: remap sock_filter insns into bpf_insn insns. */
   1266 	err = bpf_convert_filter(old_prog, old_len, fp, &new_len,
   1267 				 &seen_ld_abs);
   1268 	if (err)
   1269 		/* 2nd bpf_convert_filter() can fail only if it fails
   1270 		 * to allocate memory, remapping must succeed. Note,
   1271 		 * that at this time old_fp has already been released
   1272 		 * by krealloc().
   1273 		 */
   1274 		goto out_err_free;
   1275 
   1276 	fp = bpf_prog_select_runtime(fp, &err);
   1277 	if (err)
   1278 		goto out_err_free;
   1279 
   1280 	kfree(old_prog);
   1281 	return fp;
   1282 
   1283 out_err_free:
   1284 	kfree(old_prog);
   1285 out_err:
   1286 	__bpf_prog_release(fp);
   1287 	return ERR_PTR(err);
   1288 }
   1289 
   1290 static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp,
   1291 					   bpf_aux_classic_check_t trans)
   1292 {
   1293 	int err;
   1294 
   1295 	fp->bpf_func = NULL;
   1296 	fp->jited = 0;
   1297 
   1298 	err = bpf_check_classic(fp->insns, fp->len);
   1299 	if (err) {
   1300 		__bpf_prog_release(fp);
   1301 		return ERR_PTR(err);
   1302 	}
   1303 
   1304 	/* There might be additional checks and transformations
   1305 	 * needed on classic filters, f.e. in case of seccomp.
   1306 	 */
   1307 	if (trans) {
   1308 		err = trans(fp->insns, fp->len);
   1309 		if (err) {
   1310 			__bpf_prog_release(fp);
   1311 			return ERR_PTR(err);
   1312 		}
   1313 	}
   1314 
   1315 	/* Probe if we can JIT compile the filter and if so, do
   1316 	 * the compilation of the filter.
   1317 	 */
   1318 	bpf_jit_compile(fp);
   1319 
   1320 	/* JIT compiler couldn't process this filter, so do the
   1321 	 * internal BPF translation for the optimized interpreter.
   1322 	 */
   1323 	if (!fp->jited)
   1324 		fp = bpf_migrate_filter(fp);
   1325 
   1326 	return fp;
   1327 }
   1328 
   1329 /**
   1330  *	bpf_prog_create - create an unattached filter
   1331  *	@pfp: the unattached filter that is created
   1332  *	@fprog: the filter program
   1333  *
   1334  * Create a filter independent of any socket. We first run some
   1335  * sanity checks on it to make sure it does not explode on us later.
   1336  * If an error occurs or there is insufficient memory for the filter
   1337  * a negative errno code is returned. On success the return is zero.
   1338  */
   1339 int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog)
   1340 {
   1341 	unsigned int fsize = bpf_classic_proglen(fprog);
   1342 	struct bpf_prog *fp;
   1343 
   1344 	/* Make sure new filter is there and in the right amounts. */
   1345 	if (!bpf_check_basics_ok(fprog->filter, fprog->len))
   1346 		return -EINVAL;
   1347 
   1348 	fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
   1349 	if (!fp)
   1350 		return -ENOMEM;
   1351 
   1352 	memcpy(fp->insns, fprog->filter, fsize);
   1353 
   1354 	fp->len = fprog->len;
   1355 	/* Since unattached filters are not copied back to user
   1356 	 * space through sk_get_filter(), we do not need to hold
   1357 	 * a copy here, and can spare us the work.
   1358 	 */
   1359 	fp->orig_prog = NULL;
   1360 
   1361 	/* bpf_prepare_filter() already takes care of freeing
   1362 	 * memory in case something goes wrong.
   1363 	 */
   1364 	fp = bpf_prepare_filter(fp, NULL);
   1365 	if (IS_ERR(fp))
   1366 		return PTR_ERR(fp);
   1367 
   1368 	*pfp = fp;
   1369 	return 0;
   1370 }
   1371 EXPORT_SYMBOL_GPL(bpf_prog_create);
   1372 
   1373 /**
   1374  *	bpf_prog_create_from_user - create an unattached filter from user buffer
   1375  *	@pfp: the unattached filter that is created
   1376  *	@fprog: the filter program
   1377  *	@trans: post-classic verifier transformation handler
   1378  *	@save_orig: save classic BPF program
   1379  *
   1380  * This function effectively does the same as bpf_prog_create(), only
   1381  * that it builds up its insns buffer from user space provided buffer.
   1382  * It also allows for passing a bpf_aux_classic_check_t handler.
   1383  */
   1384 int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
   1385 			      bpf_aux_classic_check_t trans, bool save_orig)
   1386 {
   1387 	unsigned int fsize = bpf_classic_proglen(fprog);
   1388 	struct bpf_prog *fp;
   1389 	int err;
   1390 
   1391 	/* Make sure new filter is there and in the right amounts. */
   1392 	if (!bpf_check_basics_ok(fprog->filter, fprog->len))
   1393 		return -EINVAL;
   1394 
   1395 	fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
   1396 	if (!fp)
   1397 		return -ENOMEM;
   1398 
   1399 	if (copy_from_user(fp->insns, fprog->filter, fsize)) {
   1400 		__bpf_prog_free(fp);
   1401 		return -EFAULT;
   1402 	}
   1403 
   1404 	fp->len = fprog->len;
   1405 	fp->orig_prog = NULL;
   1406 
   1407 	if (save_orig) {
   1408 		err = bpf_prog_store_orig_filter(fp, fprog);
   1409 		if (err) {
   1410 			__bpf_prog_free(fp);
   1411 			return -ENOMEM;
   1412 		}
   1413 	}
   1414 
   1415 	/* bpf_prepare_filter() already takes care of freeing
   1416 	 * memory in case something goes wrong.
   1417 	 */
   1418 	fp = bpf_prepare_filter(fp, trans);
   1419 	if (IS_ERR(fp))
   1420 		return PTR_ERR(fp);
   1421 
   1422 	*pfp = fp;
   1423 	return 0;
   1424 }
   1425 EXPORT_SYMBOL_GPL(bpf_prog_create_from_user);
   1426 
   1427 void bpf_prog_destroy(struct bpf_prog *fp)
   1428 {
   1429 	__bpf_prog_release(fp);
   1430 }
   1431 EXPORT_SYMBOL_GPL(bpf_prog_destroy);
   1432 
   1433 static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
   1434 {
   1435 	struct sk_filter *fp, *old_fp;
   1436 
   1437 	fp = kmalloc(sizeof(*fp), GFP_KERNEL);
   1438 	if (!fp)
   1439 		return -ENOMEM;
   1440 
   1441 	fp->prog = prog;
   1442 
   1443 	if (!__sk_filter_charge(sk, fp)) {
   1444 		kfree(fp);
   1445 		return -ENOMEM;
   1446 	}
   1447 	refcount_set(&fp->refcnt, 1);
   1448 
   1449 	old_fp = rcu_dereference_protected(sk->sk_filter,
   1450 					   lockdep_sock_is_held(sk));
   1451 	rcu_assign_pointer(sk->sk_filter, fp);
   1452 
   1453 	if (old_fp)
   1454 		sk_filter_uncharge(sk, old_fp);
   1455 
   1456 	return 0;
   1457 }
   1458 
   1459 static
   1460 struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk)
   1461 {
   1462 	unsigned int fsize = bpf_classic_proglen(fprog);
   1463 	struct bpf_prog *prog;
   1464 	int err;
   1465 
   1466 	if (sock_flag(sk, SOCK_FILTER_LOCKED))
   1467 		return ERR_PTR(-EPERM);
   1468 
   1469 	/* Make sure new filter is there and in the right amounts. */
   1470 	if (!bpf_check_basics_ok(fprog->filter, fprog->len))
   1471 		return ERR_PTR(-EINVAL);
   1472 
   1473 	prog = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
   1474 	if (!prog)
   1475 		return ERR_PTR(-ENOMEM);
   1476 
   1477 	if (copy_from_user(prog->insns, fprog->filter, fsize)) {
   1478 		__bpf_prog_free(prog);
   1479 		return ERR_PTR(-EFAULT);
   1480 	}
   1481 
   1482 	prog->len = fprog->len;
   1483 
   1484 	err = bpf_prog_store_orig_filter(prog, fprog);
   1485 	if (err) {
   1486 		__bpf_prog_free(prog);
   1487 		return ERR_PTR(-ENOMEM);
   1488 	}
   1489 
   1490 	/* bpf_prepare_filter() already takes care of freeing
   1491 	 * memory in case something goes wrong.
   1492 	 */
   1493 	return bpf_prepare_filter(prog, NULL);
   1494 }
   1495 
   1496 /**
   1497  *	sk_attach_filter - attach a socket filter
   1498  *	@fprog: the filter program
   1499  *	@sk: the socket to use
   1500  *
   1501  * Attach the user's filter code. We first run some sanity checks on
   1502  * it to make sure it does not explode on us later. If an error
   1503  * occurs or there is insufficient memory for the filter a negative
   1504  * errno code is returned. On success the return is zero.
   1505  */
   1506 int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
   1507 {
   1508 	struct bpf_prog *prog = __get_filter(fprog, sk);
   1509 	int err;
   1510 
   1511 	if (IS_ERR(prog))
   1512 		return PTR_ERR(prog);
   1513 
   1514 	err = __sk_attach_prog(prog, sk);
   1515 	if (err < 0) {
   1516 		__bpf_prog_release(prog);
   1517 		return err;
   1518 	}
   1519 
   1520 	return 0;
   1521 }
   1522 EXPORT_SYMBOL_GPL(sk_attach_filter);
   1523 
   1524 int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
   1525 {
   1526 	struct bpf_prog *prog = __get_filter(fprog, sk);
   1527 	int err;
   1528 
   1529 	if (IS_ERR(prog))
   1530 		return PTR_ERR(prog);
   1531 
   1532 	if (bpf_prog_size(prog->len) > sysctl_optmem_max)
   1533 		err = -ENOMEM;
   1534 	else
   1535 		err = reuseport_attach_prog(sk, prog);
   1536 
   1537 	if (err)
   1538 		__bpf_prog_release(prog);
   1539 
   1540 	return err;
   1541 }
   1542 
   1543 static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk)
   1544 {
   1545 	if (sock_flag(sk, SOCK_FILTER_LOCKED))
   1546 		return ERR_PTR(-EPERM);
   1547 
   1548 	return bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
   1549 }
   1550 
   1551 int sk_attach_bpf(u32 ufd, struct sock *sk)
   1552 {
   1553 	struct bpf_prog *prog = __get_bpf(ufd, sk);
   1554 	int err;
   1555 
   1556 	if (IS_ERR(prog))
   1557 		return PTR_ERR(prog);
   1558 
   1559 	err = __sk_attach_prog(prog, sk);
   1560 	if (err < 0) {
   1561 		bpf_prog_put(prog);
   1562 		return err;
   1563 	}
   1564 
   1565 	return 0;
   1566 }
   1567 
   1568 int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
   1569 {
   1570 	struct bpf_prog *prog;
   1571 	int err;
   1572 
   1573 	if (sock_flag(sk, SOCK_FILTER_LOCKED))
   1574 		return -EPERM;
   1575 
   1576 	prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
   1577 	if (IS_ERR(prog) && PTR_ERR(prog) == -EINVAL)
   1578 		prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SK_REUSEPORT);
   1579 	if (IS_ERR(prog))
   1580 		return PTR_ERR(prog);
   1581 
   1582 	if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT) {
   1583 		/* Like other non BPF_PROG_TYPE_SOCKET_FILTER
   1584 		 * bpf prog (e.g. sockmap).  It depends on the
   1585 		 * limitation imposed by bpf_prog_load().
   1586 		 * Hence, sysctl_optmem_max is not checked.
   1587 		 */
   1588 		if ((sk->sk_type != SOCK_STREAM &&
   1589 		     sk->sk_type != SOCK_DGRAM) ||
   1590 		    (sk->sk_protocol != IPPROTO_UDP &&
   1591 		     sk->sk_protocol != IPPROTO_TCP) ||
   1592 		    (sk->sk_family != AF_INET &&
   1593 		     sk->sk_family != AF_INET6)) {
   1594 			err = -ENOTSUPP;
   1595 			goto err_prog_put;
   1596 		}
   1597 	} else {
   1598 		/* BPF_PROG_TYPE_SOCKET_FILTER */
   1599 		if (bpf_prog_size(prog->len) > sysctl_optmem_max) {
   1600 			err = -ENOMEM;
   1601 			goto err_prog_put;
   1602 		}
   1603 	}
   1604 
   1605 	err = reuseport_attach_prog(sk, prog);
   1606 err_prog_put:
   1607 	if (err)
   1608 		bpf_prog_put(prog);
   1609 
   1610 	return err;
   1611 }
   1612 
   1613 void sk_reuseport_prog_free(struct bpf_prog *prog)
   1614 {
   1615 	if (!prog)
   1616 		return;
   1617 
   1618 	if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT)
   1619 		bpf_prog_put(prog);
   1620 	else
   1621 		bpf_prog_destroy(prog);
   1622 }
   1623 
   1624 struct bpf_scratchpad {
   1625 	union {
   1626 		__be32 diff[MAX_BPF_STACK / sizeof(__be32)];
   1627 		u8     buff[MAX_BPF_STACK];
   1628 	};
   1629 };
   1630 
   1631 static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp);
   1632 
   1633 static inline int __bpf_try_make_writable(struct sk_buff *skb,
   1634 					  unsigned int write_len)
   1635 {
   1636 	return skb_ensure_writable(skb, write_len);
   1637 }
   1638 
   1639 static inline int bpf_try_make_writable(struct sk_buff *skb,
   1640 					unsigned int write_len)
   1641 {
   1642 	int err = __bpf_try_make_writable(skb, write_len);
   1643 
   1644 	bpf_compute_data_pointers(skb);
   1645 	return err;
   1646 }
   1647 
   1648 static int bpf_try_make_head_writable(struct sk_buff *skb)
   1649 {
   1650 	return bpf_try_make_writable(skb, skb_headlen(skb));
   1651 }
   1652 
   1653 static inline void bpf_push_mac_rcsum(struct sk_buff *skb)
   1654 {
   1655 	if (skb_at_tc_ingress(skb))
   1656 		skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len);
   1657 }
   1658 
   1659 static inline void bpf_pull_mac_rcsum(struct sk_buff *skb)
   1660 {
   1661 	if (skb_at_tc_ingress(skb))
   1662 		skb_postpull_rcsum(skb, skb_mac_header(skb), skb->mac_len);
   1663 }
   1664 
   1665 BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset,
   1666 	   const void *, from, u32, len, u64, flags)
   1667 {
   1668 	void *ptr;
   1669 
   1670 	if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH)))
   1671 		return -EINVAL;
   1672 	if (unlikely(offset > 0xffff))
   1673 		return -EFAULT;
   1674 	if (unlikely(bpf_try_make_writable(skb, offset + len)))
   1675 		return -EFAULT;
   1676 
   1677 	ptr = skb->data + offset;
   1678 	if (flags & BPF_F_RECOMPUTE_CSUM)
   1679 		__skb_postpull_rcsum(skb, ptr, len, offset);
   1680 
   1681 	memcpy(ptr, from, len);
   1682 
   1683 	if (flags & BPF_F_RECOMPUTE_CSUM)
   1684 		__skb_postpush_rcsum(skb, ptr, len, offset);
   1685 	if (flags & BPF_F_INVALIDATE_HASH)
   1686 		skb_clear_hash(skb);
   1687 
   1688 	return 0;
   1689 }
   1690 
   1691 static const struct bpf_func_proto bpf_skb_store_bytes_proto = {
   1692 	.func		= bpf_skb_store_bytes,
   1693 	.gpl_only	= false,
   1694 	.ret_type	= RET_INTEGER,
   1695 	.arg1_type	= ARG_PTR_TO_CTX,
   1696 	.arg2_type	= ARG_ANYTHING,
   1697 	.arg3_type	= ARG_PTR_TO_MEM,
   1698 	.arg4_type	= ARG_CONST_SIZE,
   1699 	.arg5_type	= ARG_ANYTHING,
   1700 };
   1701 
   1702 BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset,
   1703 	   void *, to, u32, len)
   1704 {
   1705 	void *ptr;
   1706 
   1707 	if (unlikely(offset > 0xffff))
   1708 		goto err_clear;
   1709 
   1710 	ptr = skb_header_pointer(skb, offset, len, to);
   1711 	if (unlikely(!ptr))
   1712 		goto err_clear;
   1713 	if (ptr != to)
   1714 		memcpy(to, ptr, len);
   1715 
   1716 	return 0;
   1717 err_clear:
   1718 	memset(to, 0, len);
   1719 	return -EFAULT;
   1720 }
   1721 
   1722 static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
   1723 	.func		= bpf_skb_load_bytes,
   1724 	.gpl_only	= false,
   1725 	.ret_type	= RET_INTEGER,
   1726 	.arg1_type	= ARG_PTR_TO_CTX,
   1727 	.arg2_type	= ARG_ANYTHING,
   1728 	.arg3_type	= ARG_PTR_TO_UNINIT_MEM,
   1729 	.arg4_type	= ARG_CONST_SIZE,
   1730 };
   1731 
   1732 BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb,
   1733 	   u32, offset, void *, to, u32, len, u32, start_header)
   1734 {
   1735 	u8 *end = skb_tail_pointer(skb);
   1736 	u8 *net = skb_network_header(skb);
   1737 	u8 *mac = skb_mac_header(skb);
   1738 	u8 *ptr;
   1739 
   1740 	if (unlikely(offset > 0xffff || len > (end - mac)))
   1741 		goto err_clear;
   1742 
   1743 	switch (start_header) {
   1744 	case BPF_HDR_START_MAC:
   1745 		ptr = mac + offset;
   1746 		break;
   1747 	case BPF_HDR_START_NET:
   1748 		ptr = net + offset;
   1749 		break;
   1750 	default:
   1751 		goto err_clear;
   1752 	}
   1753 
   1754 	if (likely(ptr >= mac && ptr + len <= end)) {
   1755 		memcpy(to, ptr, len);
   1756 		return 0;
   1757 	}
   1758 
   1759 err_clear:
   1760 	memset(to, 0, len);
   1761 	return -EFAULT;
   1762 }
   1763 
   1764 static const struct bpf_func_proto bpf_skb_load_bytes_relative_proto = {
   1765 	.func		= bpf_skb_load_bytes_relative,
   1766 	.gpl_only	= false,
   1767 	.ret_type	= RET_INTEGER,
   1768 	.arg1_type	= ARG_PTR_TO_CTX,
   1769 	.arg2_type	= ARG_ANYTHING,
   1770 	.arg3_type	= ARG_PTR_TO_UNINIT_MEM,
   1771 	.arg4_type	= ARG_CONST_SIZE,
   1772 	.arg5_type	= ARG_ANYTHING,
   1773 };
   1774 
   1775 BPF_CALL_2(bpf_skb_pull_data, struct sk_buff *, skb, u32, len)
   1776 {
   1777 	/* Idea is the following: should the needed direct read/write
   1778 	 * test fail during runtime, we can pull in more data and redo
   1779 	 * again, since implicitly, we invalidate previous checks here.
   1780 	 *
   1781 	 * Or, since we know how much we need to make read/writeable,
   1782 	 * this can be done once at the program beginning for direct
   1783 	 * access case. By this we overcome limitations of only current
   1784 	 * headroom being accessible.
   1785 	 */
   1786 	return bpf_try_make_writable(skb, len ? : skb_headlen(skb));
   1787 }
   1788 
   1789 static const struct bpf_func_proto bpf_skb_pull_data_proto = {
   1790 	.func		= bpf_skb_pull_data,
   1791 	.gpl_only	= false,
   1792 	.ret_type	= RET_INTEGER,
   1793 	.arg1_type	= ARG_PTR_TO_CTX,
   1794 	.arg2_type	= ARG_ANYTHING,
   1795 };
   1796 
   1797 BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk)
   1798 {
   1799 	return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL;
   1800 }
   1801 
   1802 static const struct bpf_func_proto bpf_sk_fullsock_proto = {
   1803 	.func		= bpf_sk_fullsock,
   1804 	.gpl_only	= false,
   1805 	.ret_type	= RET_PTR_TO_SOCKET_OR_NULL,
   1806 	.arg1_type	= ARG_PTR_TO_SOCK_COMMON,
   1807 };
   1808 
   1809 static inline int sk_skb_try_make_writable(struct sk_buff *skb,
   1810 					   unsigned int write_len)
   1811 {
   1812 	int err = __bpf_try_make_writable(skb, write_len);
   1813 
   1814 	bpf_compute_data_end_sk_skb(skb);
   1815 	return err;
   1816 }
   1817 
   1818 BPF_CALL_2(sk_skb_pull_data, struct sk_buff *, skb, u32, len)
   1819 {
   1820 	/* Idea is the following: should the needed direct read/write
   1821 	 * test fail during runtime, we can pull in more data and redo
   1822 	 * again, since implicitly, we invalidate previous checks here.
   1823 	 *
   1824 	 * Or, since we know how much we need to make read/writeable,
   1825 	 * this can be done once at the program beginning for direct
   1826 	 * access case. By this we overcome limitations of only current
   1827 	 * headroom being accessible.
   1828 	 */
   1829 	return sk_skb_try_make_writable(skb, len ? : skb_headlen(skb));
   1830 }
   1831 
   1832 static const struct bpf_func_proto sk_skb_pull_data_proto = {
   1833 	.func		= sk_skb_pull_data,
   1834 	.gpl_only	= false,
   1835 	.ret_type	= RET_INTEGER,
   1836 	.arg1_type	= ARG_PTR_TO_CTX,
   1837 	.arg2_type	= ARG_ANYTHING,
   1838 };
   1839 
   1840 BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset,
   1841 	   u64, from, u64, to, u64, flags)
   1842 {
   1843 	__sum16 *ptr;
   1844 
   1845 	if (unlikely(flags & ~(BPF_F_HDR_FIELD_MASK)))
   1846 		return -EINVAL;
   1847 	if (unlikely(offset > 0xffff || offset & 1))
   1848 		return -EFAULT;
   1849 	if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
   1850 		return -EFAULT;
   1851 
   1852 	ptr = (__sum16 *)(skb->data + offset);
   1853 	switch (flags & BPF_F_HDR_FIELD_MASK) {
   1854 	case 0:
   1855 		if (unlikely(from != 0))
   1856 			return -EINVAL;
   1857 
   1858 		csum_replace_by_diff(ptr, to);
   1859 		break;
   1860 	case 2:
   1861 		csum_replace2(ptr, from, to);
   1862 		break;
   1863 	case 4:
   1864 		csum_replace4(ptr, from, to);
   1865 		break;
   1866 	default:
   1867 		return -EINVAL;
   1868 	}
   1869 
   1870 	return 0;
   1871 }
   1872 
   1873 static const struct bpf_func_proto bpf_l3_csum_replace_proto = {
   1874 	.func		= bpf_l3_csum_replace,
   1875 	.gpl_only	= false,
   1876 	.ret_type	= RET_INTEGER,
   1877 	.arg1_type	= ARG_PTR_TO_CTX,
   1878 	.arg2_type	= ARG_ANYTHING,
   1879 	.arg3_type	= ARG_ANYTHING,
   1880 	.arg4_type	= ARG_ANYTHING,
   1881 	.arg5_type	= ARG_ANYTHING,
   1882 };
   1883 
   1884 BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset,
   1885 	   u64, from, u64, to, u64, flags)
   1886 {
   1887 	bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
   1888 	bool is_mmzero = flags & BPF_F_MARK_MANGLED_0;
   1889 	bool do_mforce = flags & BPF_F_MARK_ENFORCE;
   1890 	__sum16 *ptr;
   1891 
   1892 	if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_MARK_ENFORCE |
   1893 			       BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK)))
   1894 		return -EINVAL;
   1895 	if (unlikely(offset > 0xffff || offset & 1))
   1896 		return -EFAULT;
   1897 	if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
   1898 		return -EFAULT;
   1899 
   1900 	ptr = (__sum16 *)(skb->data + offset);
   1901 	if (is_mmzero && !do_mforce && !*ptr)
   1902 		return 0;
   1903 
   1904 	switch (flags & BPF_F_HDR_FIELD_MASK) {
   1905 	case 0:
   1906 		if (unlikely(from != 0))
   1907 			return -EINVAL;
   1908 
   1909 		inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo);
   1910 		break;
   1911 	case 2:
   1912 		inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo);
   1913 		break;
   1914 	case 4:
   1915 		inet_proto_csum_replace4(ptr, skb, from, to, is_pseudo);
   1916 		break;
   1917 	default:
   1918 		return -EINVAL;
   1919 	}
   1920 
   1921 	if (is_mmzero && !*ptr)
   1922 		*ptr = CSUM_MANGLED_0;
   1923 	return 0;
   1924 }
   1925 
   1926 static const struct bpf_func_proto bpf_l4_csum_replace_proto = {
   1927 	.func		= bpf_l4_csum_replace,
   1928 	.gpl_only	= false,
   1929 	.ret_type	= RET_INTEGER,
   1930 	.arg1_type	= ARG_PTR_TO_CTX,
   1931 	.arg2_type	= ARG_ANYTHING,
   1932 	.arg3_type	= ARG_ANYTHING,
   1933 	.arg4_type	= ARG_ANYTHING,
   1934 	.arg5_type	= ARG_ANYTHING,
   1935 };
   1936 
   1937 BPF_CALL_5(bpf_csum_diff, __be32 *, from, u32, from_size,
   1938 	   __be32 *, to, u32, to_size, __wsum, seed)
   1939 {
   1940 	struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
   1941 	u32 diff_size = from_size + to_size;
   1942 	int i, j = 0;
   1943 
   1944 	/* This is quite flexible, some examples:
   1945 	 *
   1946 	 * from_size == 0, to_size > 0,  seed := csum --> pushing data
   1947 	 * from_size > 0,  to_size == 0, seed := csum --> pulling data
   1948 	 * from_size > 0,  to_size > 0,  seed := 0    --> diffing data
   1949 	 *
   1950 	 * Even for diffing, from_size and to_size don't need to be equal.
   1951 	 */
   1952 	if (unlikely(((from_size | to_size) & (sizeof(__be32) - 1)) ||
   1953 		     diff_size > sizeof(sp->diff)))
   1954 		return -EINVAL;
   1955 
   1956 	for (i = 0; i < from_size / sizeof(__be32); i++, j++)
   1957 		sp->diff[j] = ~from[i];
   1958 	for (i = 0; i <   to_size / sizeof(__be32); i++, j++)
   1959 		sp->diff[j] = to[i];
   1960 
   1961 	return csum_partial(sp->diff, diff_size, seed);
   1962 }
   1963 
   1964 static const struct bpf_func_proto bpf_csum_diff_proto = {
   1965 	.func		= bpf_csum_diff,
   1966 	.gpl_only	= false,
   1967 	.pkt_access	= true,
   1968 	.ret_type	= RET_INTEGER,
   1969 	.arg1_type	= ARG_PTR_TO_MEM_OR_NULL,
   1970 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
   1971 	.arg3_type	= ARG_PTR_TO_MEM_OR_NULL,
   1972 	.arg4_type	= ARG_CONST_SIZE_OR_ZERO,
   1973 	.arg5_type	= ARG_ANYTHING,
   1974 };
   1975 
   1976 BPF_CALL_2(bpf_csum_update, struct sk_buff *, skb, __wsum, csum)
   1977 {
   1978 	/* The interface is to be used in combination with bpf_csum_diff()
   1979 	 * for direct packet writes. csum rotation for alignment as well
   1980 	 * as emulating csum_sub() can be done from the eBPF program.
   1981 	 */
   1982 	if (skb->ip_summed == CHECKSUM_COMPLETE)
   1983 		return (skb->csum = csum_add(skb->csum, csum));
   1984 
   1985 	return -ENOTSUPP;
   1986 }
   1987 
   1988 static const struct bpf_func_proto bpf_csum_update_proto = {
   1989 	.func		= bpf_csum_update,
   1990 	.gpl_only	= false,
   1991 	.ret_type	= RET_INTEGER,
   1992 	.arg1_type	= ARG_PTR_TO_CTX,
   1993 	.arg2_type	= ARG_ANYTHING,
   1994 };
   1995 
   1996 static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb)
   1997 {
   1998 	return dev_forward_skb(dev, skb);
   1999 }
   2000 
   2001 static inline int __bpf_rx_skb_no_mac(struct net_device *dev,
   2002 				      struct sk_buff *skb)
   2003 {
   2004 	int ret = ____dev_forward_skb(dev, skb);
   2005 
   2006 	if (likely(!ret)) {
   2007 		skb->dev = dev;
   2008 		ret = netif_rx(skb);
   2009 	}
   2010 
   2011 	return ret;
   2012 }
   2013 
   2014 static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
   2015 {
   2016 	int ret;
   2017 
   2018 	if (unlikely(__this_cpu_read(xmit_recursion) > XMIT_RECURSION_LIMIT)) {
   2019 		net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
   2020 		kfree_skb(skb);
   2021 		return -ENETDOWN;
   2022 	}
   2023 
   2024 	skb->dev = dev;
   2025 
   2026 	__this_cpu_inc(xmit_recursion);
   2027 	ret = dev_queue_xmit(skb);
   2028 	__this_cpu_dec(xmit_recursion);
   2029 
   2030 	return ret;
   2031 }
   2032 
   2033 static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev,
   2034 				 u32 flags)
   2035 {
   2036 	unsigned int mlen = skb_network_offset(skb);
   2037 
   2038 	if (mlen) {
   2039 		__skb_pull(skb, mlen);
   2040 
   2041 		/* At ingress, the mac header has already been pulled once.
   2042 		 * At egress, skb_pospull_rcsum has to be done in case that
   2043 		 * the skb is originated from ingress (i.e. a forwarded skb)
   2044 		 * to ensure that rcsum starts at net header.
   2045 		 */
   2046 		if (!skb_at_tc_ingress(skb))
   2047 			skb_postpull_rcsum(skb, skb_mac_header(skb), mlen);
   2048 	}
   2049 	skb_pop_mac_header(skb);
   2050 	skb_reset_mac_len(skb);
   2051 	return flags & BPF_F_INGRESS ?
   2052 	       __bpf_rx_skb_no_mac(dev, skb) : __bpf_tx_skb(dev, skb);
   2053 }
   2054 
   2055 static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev,
   2056 				 u32 flags)
   2057 {
   2058 	/* Verify that a link layer header is carried */
   2059 	if (unlikely(skb->mac_header >= skb->network_header)) {
   2060 		kfree_skb(skb);
   2061 		return -ERANGE;
   2062 	}
   2063 
   2064 	bpf_push_mac_rcsum(skb);
   2065 	return flags & BPF_F_INGRESS ?
   2066 	       __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
   2067 }
   2068 
   2069 static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev,
   2070 			  u32 flags)
   2071 {
   2072 	if (dev_is_mac_header_xmit(dev))
   2073 		return __bpf_redirect_common(skb, dev, flags);
   2074 	else
   2075 		return __bpf_redirect_no_mac(skb, dev, flags);
   2076 }
   2077 
   2078 BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
   2079 {
   2080 	struct net_device *dev;
   2081 	struct sk_buff *clone;
   2082 	int ret;
   2083 
   2084 	if (unlikely(flags & ~(BPF_F_INGRESS)))
   2085 		return -EINVAL;
   2086 
   2087 	dev = dev_get_by_index_rcu(dev_net(skb->dev), ifindex);
   2088 	if (unlikely(!dev))
   2089 		return -EINVAL;
   2090 
   2091 	clone = skb_clone(skb, GFP_ATOMIC);
   2092 	if (unlikely(!clone))
   2093 		return -ENOMEM;
   2094 
   2095 	/* For direct write, we need to keep the invariant that the skbs
   2096 	 * we're dealing with need to be uncloned. Should uncloning fail
   2097 	 * here, we need to free the just generated clone to unclone once
   2098 	 * again.
   2099 	 */
   2100 	ret = bpf_try_make_head_writable(skb);
   2101 	if (unlikely(ret)) {
   2102 		kfree_skb(clone);
   2103 		return -ENOMEM;
   2104 	}
   2105 
   2106 	return __bpf_redirect(clone, dev, flags);
   2107 }
   2108 
   2109 static const struct bpf_func_proto bpf_clone_redirect_proto = {
   2110 	.func           = bpf_clone_redirect,
   2111 	.gpl_only       = false,
   2112 	.ret_type       = RET_INTEGER,
   2113 	.arg1_type      = ARG_PTR_TO_CTX,
   2114 	.arg2_type      = ARG_ANYTHING,
   2115 	.arg3_type      = ARG_ANYTHING,
   2116 };
   2117 
   2118 DEFINE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
   2119 EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info);
   2120 
   2121 BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
   2122 {
   2123 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
   2124 
   2125 	if (unlikely(flags & ~(BPF_F_INGRESS)))
   2126 		return TC_ACT_SHOT;
   2127 
   2128 	ri->ifindex = ifindex;
   2129 	ri->flags = flags;
   2130 
   2131 	return TC_ACT_REDIRECT;
   2132 }
   2133 
   2134 int skb_do_redirect(struct sk_buff *skb)
   2135 {
   2136 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
   2137 	struct net_device *dev;
   2138 
   2139 	dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex);
   2140 	ri->ifindex = 0;
   2141 	if (unlikely(!dev)) {
   2142 		kfree_skb(skb);
   2143 		return -EINVAL;
   2144 	}
   2145 
   2146 	return __bpf_redirect(skb, dev, ri->flags);
   2147 }
   2148 
   2149 static const struct bpf_func_proto bpf_redirect_proto = {
   2150 	.func           = bpf_redirect,
   2151 	.gpl_only       = false,
   2152 	.ret_type       = RET_INTEGER,
   2153 	.arg1_type      = ARG_ANYTHING,
   2154 	.arg2_type      = ARG_ANYTHING,
   2155 };
   2156 
   2157 BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg *, msg, u32, bytes)
   2158 {
   2159 	msg->apply_bytes = bytes;
   2160 	return 0;
   2161 }
   2162 
   2163 static const struct bpf_func_proto bpf_msg_apply_bytes_proto = {
   2164 	.func           = bpf_msg_apply_bytes,
   2165 	.gpl_only       = false,
   2166 	.ret_type       = RET_INTEGER,
   2167 	.arg1_type	= ARG_PTR_TO_CTX,
   2168 	.arg2_type      = ARG_ANYTHING,
   2169 };
   2170 
   2171 BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg *, msg, u32, bytes)
   2172 {
   2173 	msg->cork_bytes = bytes;
   2174 	return 0;
   2175 }
   2176 
   2177 static const struct bpf_func_proto bpf_msg_cork_bytes_proto = {
   2178 	.func           = bpf_msg_cork_bytes,
   2179 	.gpl_only       = false,
   2180 	.ret_type       = RET_INTEGER,
   2181 	.arg1_type	= ARG_PTR_TO_CTX,
   2182 	.arg2_type      = ARG_ANYTHING,
   2183 };
   2184 
   2185 BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start,
   2186 	   u32, end, u64, flags)
   2187 {
   2188 	u32 len = 0, offset = 0, copy = 0, poffset = 0, bytes = end - start;
   2189 	u32 first_sge, last_sge, i, shift, bytes_sg_total;
   2190 	struct scatterlist *sge;
   2191 	u8 *raw, *to, *from;
   2192 	struct page *page;
   2193 
   2194 	if (unlikely(flags || end <= start))
   2195 		return -EINVAL;
   2196 
   2197 	/* First find the starting scatterlist element */
   2198 	i = msg->sg.start;
   2199 	do {
   2200 		len = sk_msg_elem(msg, i)->length;
   2201 		if (start < offset + len)
   2202 			break;
   2203 		offset += len;
   2204 		sk_msg_iter_var_next(i);
   2205 	} while (i != msg->sg.end);
   2206 
   2207 	if (unlikely(start >= offset + len))
   2208 		return -EINVAL;
   2209 
   2210 	first_sge = i;
   2211 	/* The start may point into the sg element so we need to also
   2212 	 * account for the headroom.
   2213 	 */
   2214 	bytes_sg_total = start - offset + bytes;
   2215 	if (!msg->sg.copy[i] && bytes_sg_total <= len)
   2216 		goto out;
   2217 
   2218 	/* At this point we need to linearize multiple scatterlist
   2219 	 * elements or a single shared page. Either way we need to
   2220 	 * copy into a linear buffer exclusively owned by BPF. Then
   2221 	 * place the buffer in the scatterlist and fixup the original
   2222 	 * entries by removing the entries now in the linear buffer
   2223 	 * and shifting the remaining entries. For now we do not try
   2224 	 * to copy partial entries to avoid complexity of running out
   2225 	 * of sg_entry slots. The downside is reading a single byte
   2226 	 * will copy the entire sg entry.
   2227 	 */
   2228 	do {
   2229 		copy += sk_msg_elem(msg, i)->length;
   2230 		sk_msg_iter_var_next(i);
   2231 		if (bytes_sg_total <= copy)
   2232 			break;
   2233 	} while (i != msg->sg.end);
   2234 	last_sge = i;
   2235 
   2236 	if (unlikely(bytes_sg_total > copy))
   2237 		return -EINVAL;
   2238 
   2239 	page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC | __GFP_COMP,
   2240 			   get_order(copy));
   2241 	if (unlikely(!page))
   2242 		return -ENOMEM;
   2243 
   2244 	raw = page_address(page);
   2245 	i = first_sge;
   2246 	do {
   2247 		sge = sk_msg_elem(msg, i);
   2248 		from = sg_virt(sge);
   2249 		len = sge->length;
   2250 		to = raw + poffset;
   2251 
   2252 		memcpy(to, from, len);
   2253 		poffset += len;
   2254 		sge->length = 0;
   2255 		put_page(sg_page(sge));
   2256 
   2257 		sk_msg_iter_var_next(i);
   2258 	} while (i != last_sge);
   2259 
   2260 	sg_set_page(&msg->sg.data[first_sge], page, copy, 0);
   2261 
   2262 	/* To repair sg ring we need to shift entries. If we only
   2263 	 * had a single entry though we can just replace it and
   2264 	 * be done. Otherwise walk the ring and shift the entries.
   2265 	 */
   2266 	WARN_ON_ONCE(last_sge == first_sge);
   2267 	shift = last_sge > first_sge ?
   2268 		last_sge - first_sge - 1 :
   2269 		MAX_SKB_FRAGS - first_sge + last_sge - 1;
   2270 	if (!shift)
   2271 		goto out;
   2272 
   2273 	i = first_sge;
   2274 	sk_msg_iter_var_next(i);
   2275 	do {
   2276 		u32 move_from;
   2277 
   2278 		if (i + shift >= MAX_MSG_FRAGS)
   2279 			move_from = i + shift - MAX_MSG_FRAGS;
   2280 		else
   2281 			move_from = i + shift;
   2282 		if (move_from == msg->sg.end)
   2283 			break;
   2284 
   2285 		msg->sg.data[i] = msg->sg.data[move_from];
   2286 		msg->sg.data[move_from].length = 0;
   2287 		msg->sg.data[move_from].page_link = 0;
   2288 		msg->sg.data[move_from].offset = 0;
   2289 		sk_msg_iter_var_next(i);
   2290 	} while (1);
   2291 
   2292 	msg->sg.end = msg->sg.end - shift > msg->sg.end ?
   2293 		      msg->sg.end - shift + MAX_MSG_FRAGS :
   2294 		      msg->sg.end - shift;
   2295 out:
   2296 	msg->data = sg_virt(&msg->sg.data[first_sge]) + start - offset;
   2297 	msg->data_end = msg->data + bytes;
   2298 	return 0;
   2299 }
   2300 
   2301 static const struct bpf_func_proto bpf_msg_pull_data_proto = {
   2302 	.func		= bpf_msg_pull_data,
   2303 	.gpl_only	= false,
   2304 	.ret_type	= RET_INTEGER,
   2305 	.arg1_type	= ARG_PTR_TO_CTX,
   2306 	.arg2_type	= ARG_ANYTHING,
   2307 	.arg3_type	= ARG_ANYTHING,
   2308 	.arg4_type	= ARG_ANYTHING,
   2309 };
   2310 
   2311 BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
   2312 	   u32, len, u64, flags)
   2313 {
   2314 	struct scatterlist sge, nsge, nnsge, rsge = {0}, *psge;
   2315 	u32 new, i = 0, l, space, copy = 0, offset = 0;
   2316 	u8 *raw, *to, *from;
   2317 	struct page *page;
   2318 
   2319 	if (unlikely(flags))
   2320 		return -EINVAL;
   2321 
   2322 	/* First find the starting scatterlist element */
   2323 	i = msg->sg.start;
   2324 	do {
   2325 		l = sk_msg_elem(msg, i)->length;
   2326 
   2327 		if (start < offset + l)
   2328 			break;
   2329 		offset += l;
   2330 		sk_msg_iter_var_next(i);
   2331 	} while (i != msg->sg.end);
   2332 
   2333 	if (start >= offset + l)
   2334 		return -EINVAL;
   2335 
   2336 	space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);
   2337 
   2338 	/* If no space available will fallback to copy, we need at
   2339 	 * least one scatterlist elem available to push data into
   2340 	 * when start aligns to the beginning of an element or two
   2341 	 * when it falls inside an element. We handle the start equals
   2342 	 * offset case because its the common case for inserting a
   2343 	 * header.
   2344 	 */
   2345 	if (!space || (space == 1 && start != offset))
   2346 		copy = msg->sg.data[i].length;
   2347 
   2348 	page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC | __GFP_COMP,
   2349 			   get_order(copy + len));
   2350 	if (unlikely(!page))
   2351 		return -ENOMEM;
   2352 
   2353 	if (copy) {
   2354 		int front, back;
   2355 
   2356 		raw = page_address(page);
   2357 
   2358 		psge = sk_msg_elem(msg, i);
   2359 		front = start - offset;
   2360 		back = psge->length - front;
   2361 		from = sg_virt(psge);
   2362 
   2363 		if (front)
   2364 			memcpy(raw, from, front);
   2365 
   2366 		if (back) {
   2367 			from += front;
   2368 			to = raw + front + len;
   2369 
   2370 			memcpy(to, from, back);
   2371 		}
   2372 
   2373 		put_page(sg_page(psge));
   2374 	} else if (start - offset) {
   2375 		psge = sk_msg_elem(msg, i);
   2376 		rsge = sk_msg_elem_cpy(msg, i);
   2377 
   2378 		psge->length = start - offset;
   2379 		rsge.length -= psge->length;
   2380 		rsge.offset += start;
   2381 
   2382 		sk_msg_iter_var_next(i);
   2383 		sg_unmark_end(psge);
   2384 		sk_msg_iter_next(msg, end);
   2385 	}
   2386 
   2387 	/* Slot(s) to place newly allocated data */
   2388 	new = i;
   2389 
   2390 	/* Shift one or two slots as needed */
   2391 	if (!copy) {
   2392 		sge = sk_msg_elem_cpy(msg, i);
   2393 
   2394 		sk_msg_iter_var_next(i);
   2395 		sg_unmark_end(&sge);
   2396 		sk_msg_iter_next(msg, end);
   2397 
   2398 		nsge = sk_msg_elem_cpy(msg, i);
   2399 		if (rsge.length) {
   2400 			sk_msg_iter_var_next(i);
   2401 			nnsge = sk_msg_elem_cpy(msg, i);
   2402 		}
   2403 
   2404 		while (i != msg->sg.end) {
   2405 			msg->sg.data[i] = sge;
   2406 			sge = nsge;
   2407 			sk_msg_iter_var_next(i);
   2408 			if (rsge.length) {
   2409 				nsge = nnsge;
   2410 				nnsge = sk_msg_elem_cpy(msg, i);
   2411 			} else {
   2412 				nsge = sk_msg_elem_cpy(msg, i);
   2413 			}
   2414 		}
   2415 	}
   2416 
   2417 	/* Place newly allocated data buffer */
   2418 	sk_mem_charge(msg->sk, len);
   2419 	msg->sg.size += len;
   2420 	msg->sg.copy[new] = false;
   2421 	sg_set_page(&msg->sg.data[new], page, len + copy, 0);
   2422 	if (rsge.length) {
   2423 		get_page(sg_page(&rsge));
   2424 		sk_msg_iter_var_next(new);
   2425 		msg->sg.data[new] = rsge;
   2426 	}
   2427 
   2428 	sk_msg_compute_data_pointers(msg);
   2429 	return 0;
   2430 }
   2431 
   2432 static const struct bpf_func_proto bpf_msg_push_data_proto = {
   2433 	.func		= bpf_msg_push_data,
   2434 	.gpl_only	= false,
   2435 	.ret_type	= RET_INTEGER,
   2436 	.arg1_type	= ARG_PTR_TO_CTX,
   2437 	.arg2_type	= ARG_ANYTHING,
   2438 	.arg3_type	= ARG_ANYTHING,
   2439 	.arg4_type	= ARG_ANYTHING,
   2440 };
   2441 
   2442 static void sk_msg_shift_left(struct sk_msg *msg, int i)
   2443 {
   2444 	int prev;
   2445 
   2446 	do {
   2447 		prev = i;
   2448 		sk_msg_iter_var_next(i);
   2449 		msg->sg.data[prev] = msg->sg.data[i];
   2450 	} while (i != msg->sg.end);
   2451 
   2452 	sk_msg_iter_prev(msg, end);
   2453 }
   2454 
   2455 static void sk_msg_shift_right(struct sk_msg *msg, int i)
   2456 {
   2457 	struct scatterlist tmp, sge;
   2458 
   2459 	sk_msg_iter_next(msg, end);
   2460 	sge = sk_msg_elem_cpy(msg, i);
   2461 	sk_msg_iter_var_next(i);
   2462 	tmp = sk_msg_elem_cpy(msg, i);
   2463 
   2464 	while (i != msg->sg.end) {
   2465 		msg->sg.data[i] = sge;
   2466 		sk_msg_iter_var_next(i);
   2467 		sge = tmp;
   2468 		tmp = sk_msg_elem_cpy(msg, i);
   2469 	}
   2470 }
   2471 
   2472 BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
   2473 	   u32, len, u64, flags)
   2474 {
   2475 	u32 i = 0, l, space, offset = 0;
   2476 	u64 last = start + len;
   2477 	int pop;
   2478 
   2479 	if (unlikely(flags))
   2480 		return -EINVAL;
   2481 
   2482 	/* First find the starting scatterlist element */
   2483 	i = msg->sg.start;
   2484 	do {
   2485 		l = sk_msg_elem(msg, i)->length;
   2486 
   2487 		if (start < offset + l)
   2488 			break;
   2489 		offset += l;
   2490 		sk_msg_iter_var_next(i);
   2491 	} while (i != msg->sg.end);
   2492 
   2493 	/* Bounds checks: start and pop must be inside message */
   2494 	if (start >= offset + l || last >= msg->sg.size)
   2495 		return -EINVAL;
   2496 
   2497 	space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);
   2498 
   2499 	pop = len;
   2500 	/* --------------| offset
   2501 	 * -| start      |-------- len -------|
   2502 	 *
   2503 	 *  |----- a ----|-------- pop -------|----- b ----|
   2504 	 *  |______________________________________________| length
   2505 	 *
   2506 	 *
   2507 	 * a:   region at front of scatter element to save
   2508 	 * b:   region at back of scatter element to save when length > A + pop
   2509 	 * pop: region to pop from element, same as input 'pop' here will be
   2510 	 *      decremented below per iteration.
   2511 	 *
   2512 	 * Two top-level cases to handle when start != offset, first B is non
   2513 	 * zero and second B is zero corresponding to when a pop includes more
   2514 	 * than one element.
   2515 	 *
   2516 	 * Then if B is non-zero AND there is no space allocate space and
   2517 	 * compact A, B regions into page. If there is space shift ring to
   2518 	 * the rigth free'ing the next element in ring to place B, leaving
   2519 	 * A untouched except to reduce length.
   2520 	 */
   2521 	if (start != offset) {
   2522 		struct scatterlist *nsge, *sge = sk_msg_elem(msg, i);
   2523 		int a = start;
   2524 		int b = sge->length - pop - a;
   2525 
   2526 		sk_msg_iter_var_next(i);
   2527 
   2528 		if (pop < sge->length - a) {
   2529 			if (space) {
   2530 				sge->length = a;
   2531 				sk_msg_shift_right(msg, i);
   2532 				nsge = sk_msg_elem(msg, i);
   2533 				get_page(sg_page(sge));
   2534 				sg_set_page(nsge,
   2535 					    sg_page(sge),
   2536 					    b, sge->offset + pop + a);
   2537 			} else {
   2538 				struct page *page, *orig;
   2539 				u8 *to, *from;
   2540 
   2541 				page = alloc_pages(__GFP_NOWARN |
   2542 						   __GFP_COMP   | GFP_ATOMIC,
   2543 						   get_order(a + b));
   2544 				if (unlikely(!page))
   2545 					return -ENOMEM;
   2546 
   2547 				sge->length = a;
   2548 				orig = sg_page(sge);
   2549 				from = sg_virt(sge);
   2550 				to = page_address(page);
   2551 				memcpy(to, from, a);
   2552 				memcpy(to + a, from + a + pop, b);
   2553 				sg_set_page(sge, page, a + b, 0);
   2554 				put_page(orig);
   2555 			}
   2556 			pop = 0;
   2557 		} else if (pop >= sge->length - a) {
   2558 			sge->length = a;
   2559 			pop -= (sge->length - a);
   2560 		}
   2561 	}
   2562 
   2563 	/* From above the current layout _must_ be as follows,
   2564 	 *
   2565 	 * -| offset
   2566 	 * -| start
   2567 	 *
   2568 	 *  |---- pop ---|---------------- b ------------|
   2569 	 *  |____________________________________________| length
   2570 	 *
   2571 	 * Offset and start of the current msg elem are equal because in the
   2572 	 * previous case we handled offset != start and either consumed the
   2573 	 * entire element and advanced to the next element OR pop == 0.
   2574 	 *
   2575 	 * Two cases to handle here are first pop is less than the length
   2576 	 * leaving some remainder b above. Simply adjust the element's layout
   2577 	 * in this case. Or pop >= length of the element so that b = 0. In this
   2578 	 * case advance to next element decrementing pop.
   2579 	 */
   2580 	while (pop) {
   2581 		struct scatterlist *sge = sk_msg_elem(msg, i);
   2582 
   2583 		if (pop < sge->length) {
   2584 			sge->length -= pop;
   2585 			sge->offset += pop;
   2586 			pop = 0;
   2587 		} else {
   2588 			pop -= sge->length;
   2589 			sk_msg_shift_left(msg, i);
   2590 		}
   2591 		sk_msg_iter_var_next(i);
   2592 	}
   2593 
   2594 	sk_mem_uncharge(msg->sk, len - pop);
   2595 	msg->sg.size -= (len - pop);
   2596 	sk_msg_compute_data_pointers(msg);
   2597 	return 0;
   2598 }
   2599 
   2600 static const struct bpf_func_proto bpf_msg_pop_data_proto = {
   2601 	.func		= bpf_msg_pop_data,
   2602 	.gpl_only	= false,
   2603 	.ret_type	= RET_INTEGER,
   2604 	.arg1_type	= ARG_PTR_TO_CTX,
   2605 	.arg2_type	= ARG_ANYTHING,
   2606 	.arg3_type	= ARG_ANYTHING,
   2607 	.arg4_type	= ARG_ANYTHING,
   2608 };
   2609 
   2610 BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
   2611 {
   2612 	return task_get_classid(skb);
   2613 }
   2614 
   2615 static const struct bpf_func_proto bpf_get_cgroup_classid_proto = {
   2616 	.func           = bpf_get_cgroup_classid,
   2617 	.gpl_only       = false,
   2618 	.ret_type       = RET_INTEGER,
   2619 	.arg1_type      = ARG_PTR_TO_CTX,
   2620 };
   2621 
   2622 BPF_CALL_1(bpf_get_route_realm, const struct sk_buff *, skb)
   2623 {
   2624 	return dst_tclassid(skb);
   2625 }
   2626 
   2627 static const struct bpf_func_proto bpf_get_route_realm_proto = {
   2628 	.func           = bpf_get_route_realm,
   2629 	.gpl_only       = false,
   2630 	.ret_type       = RET_INTEGER,
   2631 	.arg1_type      = ARG_PTR_TO_CTX,
   2632 };
   2633 
   2634 BPF_CALL_1(bpf_get_hash_recalc, struct sk_buff *, skb)
   2635 {
   2636 	/* If skb_clear_hash() was called due to mangling, we can
   2637 	 * trigger SW recalculation here. Later access to hash
   2638 	 * can then use the inline skb->hash via context directly
   2639 	 * instead of calling this helper again.
   2640 	 */
   2641 	return skb_get_hash(skb);
   2642 }
   2643 
   2644 static const struct bpf_func_proto bpf_get_hash_recalc_proto = {
   2645 	.func		= bpf_get_hash_recalc,
   2646 	.gpl_only	= false,
   2647 	.ret_type	= RET_INTEGER,
   2648 	.arg1_type	= ARG_PTR_TO_CTX,
   2649 };
   2650 
   2651 BPF_CALL_1(bpf_set_hash_invalid, struct sk_buff *, skb)
   2652 {
   2653 	/* After all direct packet write, this can be used once for
   2654 	 * triggering a lazy recalc on next skb_get_hash() invocation.
   2655 	 */
   2656 	skb_clear_hash(skb);
   2657 	return 0;
   2658 }
   2659 
   2660 static const struct bpf_func_proto bpf_set_hash_invalid_proto = {
   2661 	.func		= bpf_set_hash_invalid,
   2662 	.gpl_only	= false,
   2663 	.ret_type	= RET_INTEGER,
   2664 	.arg1_type	= ARG_PTR_TO_CTX,
   2665 };
   2666 
   2667 BPF_CALL_2(bpf_set_hash, struct sk_buff *, skb, u32, hash)
   2668 {
   2669 	/* Set user specified hash as L4(+), so that it gets returned
   2670 	 * on skb_get_hash() call unless BPF prog later on triggers a
   2671 	 * skb_clear_hash().
   2672 	 */
   2673 	__skb_set_sw_hash(skb, hash, true);
   2674 	return 0;
   2675 }
   2676 
   2677 static const struct bpf_func_proto bpf_set_hash_proto = {
   2678 	.func		= bpf_set_hash,
   2679 	.gpl_only	= false,
   2680 	.ret_type	= RET_INTEGER,
   2681 	.arg1_type	= ARG_PTR_TO_CTX,
   2682 	.arg2_type	= ARG_ANYTHING,
   2683 };
   2684 
   2685 BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto,
   2686 	   u16, vlan_tci)
   2687 {
   2688 	int ret;
   2689 
   2690 	if (unlikely(vlan_proto != htons(ETH_P_8021Q) &&
   2691 		     vlan_proto != htons(ETH_P_8021AD)))
   2692 		vlan_proto = htons(ETH_P_8021Q);
   2693 
   2694 	bpf_push_mac_rcsum(skb);
   2695 	ret = skb_vlan_push(skb, vlan_proto, vlan_tci);
   2696 	bpf_pull_mac_rcsum(skb);
   2697 
   2698 	bpf_compute_data_pointers(skb);
   2699 	return ret;
   2700 }
   2701 
   2702 static const struct bpf_func_proto bpf_skb_vlan_push_proto = {
   2703 	.func           = bpf_skb_vlan_push,
   2704 	.gpl_only       = false,
   2705 	.ret_type       = RET_INTEGER,
   2706 	.arg1_type      = ARG_PTR_TO_CTX,
   2707 	.arg2_type      = ARG_ANYTHING,
   2708 	.arg3_type      = ARG_ANYTHING,
   2709 };
   2710 
   2711 BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb)
   2712 {
   2713 	int ret;
   2714 
   2715 	bpf_push_mac_rcsum(skb);
   2716 	ret = skb_vlan_pop(skb);
   2717 	bpf_pull_mac_rcsum(skb);
   2718 
   2719 	bpf_compute_data_pointers(skb);
   2720 	return ret;
   2721 }
   2722 
   2723 static const struct bpf_func_proto bpf_skb_vlan_pop_proto = {
   2724 	.func           = bpf_skb_vlan_pop,
   2725 	.gpl_only       = false,
   2726 	.ret_type       = RET_INTEGER,
   2727 	.arg1_type      = ARG_PTR_TO_CTX,
   2728 };
   2729 
   2730 static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len)
   2731 {
   2732 	/* Caller already did skb_cow() with len as headroom,
   2733 	 * so no need to do it here.
   2734 	 */
   2735 	skb_push(skb, len);
   2736 	memmove(skb->data, skb->data + len, off);
   2737 	memset(skb->data + off, 0, len);
   2738 
   2739 	/* No skb_postpush_rcsum(skb, skb->data + off, len)
   2740 	 * needed here as it does not change the skb->csum
   2741 	 * result for checksum complete when summing over
   2742 	 * zeroed blocks.
   2743 	 */
   2744 	return 0;
   2745 }
   2746 
   2747 static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len)
   2748 {
   2749 	/* skb_ensure_writable() is not needed here, as we're
   2750 	 * already working on an uncloned skb.
   2751 	 */
   2752 	if (unlikely(!pskb_may_pull(skb, off + len)))
   2753 		return -ENOMEM;
   2754 
   2755 	skb_postpull_rcsum(skb, skb->data + off, len);
   2756 	memmove(skb->data + len, skb->data, off);
   2757 	__skb_pull(skb, len);
   2758 
   2759 	return 0;
   2760 }
   2761 
   2762 static int bpf_skb_net_hdr_push(struct sk_buff *skb, u32 off, u32 len)
   2763 {
   2764 	bool trans_same = skb->transport_header == skb->network_header;
   2765 	int ret;
   2766 
   2767 	/* There's no need for __skb_push()/__skb_pull() pair to
   2768 	 * get to the start of the mac header as we're guaranteed
   2769 	 * to always start from here under eBPF.
   2770 	 */
   2771 	ret = bpf_skb_generic_push(skb, off, len);
   2772 	if (likely(!ret)) {
   2773 		skb->mac_header -= len;
   2774 		skb->network_header -= len;
   2775 		if (trans_same)
   2776 			skb->transport_header = skb->network_header;
   2777 	}
   2778 
   2779 	return ret;
   2780 }
   2781 
   2782 static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len)
   2783 {
   2784 	bool trans_same = skb->transport_header == skb->network_header;
   2785 	int ret;
   2786 
   2787 	/* Same here, __skb_push()/__skb_pull() pair not needed. */
   2788 	ret = bpf_skb_generic_pop(skb, off, len);
   2789 	if (likely(!ret)) {
   2790 		skb->mac_header += len;
   2791 		skb->network_header += len;
   2792 		if (trans_same)
   2793 			skb->transport_header = skb->network_header;
   2794 	}
   2795 
   2796 	return ret;
   2797 }
   2798 
   2799 static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
   2800 {
   2801 	const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
   2802 	u32 off = skb_mac_header_len(skb);
   2803 	int ret;
   2804 
   2805 	if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
   2806 		return -ENOTSUPP;
   2807 
   2808 	ret = skb_cow(skb, len_diff);
   2809 	if (unlikely(ret < 0))
   2810 		return ret;
   2811 
   2812 	ret = bpf_skb_net_hdr_push(skb, off, len_diff);
   2813 	if (unlikely(ret < 0))
   2814 		return ret;
   2815 
   2816 	if (skb_is_gso(skb)) {
   2817 		struct skb_shared_info *shinfo = skb_shinfo(skb);
   2818 
   2819 		/* SKB_GSO_TCPV4 needs to be changed into
   2820 		 * SKB_GSO_TCPV6.
   2821 		 */
   2822 		if (shinfo->gso_type & SKB_GSO_TCPV4) {
   2823 			shinfo->gso_type &= ~SKB_GSO_TCPV4;
   2824 			shinfo->gso_type |=  SKB_GSO_TCPV6;
   2825 		}
   2826 
   2827 		/* Due to IPv6 header, MSS needs to be downgraded. */
   2828 		skb_decrease_gso_size(shinfo, len_diff);
   2829 		/* Header must be checked, and gso_segs recomputed. */
   2830 		shinfo->gso_type |= SKB_GSO_DODGY;
   2831 		shinfo->gso_segs = 0;
   2832 	}
   2833 
   2834 	skb->protocol = htons(ETH_P_IPV6);
   2835 	skb_clear_hash(skb);
   2836 
   2837 	return 0;
   2838 }
   2839 
   2840 static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
   2841 {
   2842 	const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
   2843 	u32 off = skb_mac_header_len(skb);
   2844 	int ret;
   2845 
   2846 	if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
   2847 		return -ENOTSUPP;
   2848 
   2849 	ret = skb_unclone(skb, GFP_ATOMIC);
   2850 	if (unlikely(ret < 0))
   2851 		return ret;
   2852 
   2853 	ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
   2854 	if (unlikely(ret < 0))
   2855 		return ret;
   2856 
   2857 	if (skb_is_gso(skb)) {
   2858 		struct skb_shared_info *shinfo = skb_shinfo(skb);
   2859 
   2860 		/* SKB_GSO_TCPV6 needs to be changed into
   2861 		 * SKB_GSO_TCPV4.
   2862 		 */
   2863 		if (shinfo->gso_type & SKB_GSO_TCPV6) {
   2864 			shinfo->gso_type &= ~SKB_GSO_TCPV6;
   2865 			shinfo->gso_type |=  SKB_GSO_TCPV4;
   2866 		}
   2867 
   2868 		/* Due to IPv4 header, MSS can be upgraded. */
   2869 		skb_increase_gso_size(shinfo, len_diff);
   2870 		/* Header must be checked, and gso_segs recomputed. */
   2871 		shinfo->gso_type |= SKB_GSO_DODGY;
   2872 		shinfo->gso_segs = 0;
   2873 	}
   2874 
   2875 	skb->protocol = htons(ETH_P_IP);
   2876 	skb_clear_hash(skb);
   2877 
   2878 	return 0;
   2879 }
   2880 
   2881 static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto)
   2882 {
   2883 	__be16 from_proto = skb->protocol;
   2884 
   2885 	if (from_proto == htons(ETH_P_IP) &&
   2886 	      to_proto == htons(ETH_P_IPV6))
   2887 		return bpf_skb_proto_4_to_6(skb);
   2888 
   2889 	if (from_proto == htons(ETH_P_IPV6) &&
   2890 	      to_proto == htons(ETH_P_IP))
   2891 		return bpf_skb_proto_6_to_4(skb);
   2892 
   2893 	return -ENOTSUPP;
   2894 }
   2895 
   2896 BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto,
   2897 	   u64, flags)
   2898 {
   2899 	int ret;
   2900 
   2901 	if (unlikely(flags))
   2902 		return -EINVAL;
   2903 
   2904 	/* General idea is that this helper does the basic groundwork
   2905 	 * needed for changing the protocol, and eBPF program fills the
   2906 	 * rest through bpf_skb_store_bytes(), bpf_lX_csum_replace()
   2907 	 * and other helpers, rather than passing a raw buffer here.
   2908 	 *
   2909 	 * The rationale is to keep this minimal and without a need to
   2910 	 * deal with raw packet data. F.e. even if we would pass buffers
   2911 	 * here, the program still needs to call the bpf_lX_csum_replace()
   2912 	 * helpers anyway. Plus, this way we keep also separation of
   2913 	 * concerns, since f.e. bpf_skb_store_bytes() should only take
   2914 	 * care of stores.
   2915 	 *
   2916 	 * Currently, additional options and extension header space are
   2917 	 * not supported, but flags register is reserved so we can adapt
   2918 	 * that. For offloads, we mark packet as dodgy, so that headers
   2919 	 * need to be verified first.
   2920 	 */
   2921 	ret = bpf_skb_proto_xlat(skb, proto);
   2922 	bpf_compute_data_pointers(skb);
   2923 	return ret;
   2924 }
   2925 
   2926 static const struct bpf_func_proto bpf_skb_change_proto_proto = {
   2927 	.func		= bpf_skb_change_proto,
   2928 	.gpl_only	= false,
   2929 	.ret_type	= RET_INTEGER,
   2930 	.arg1_type	= ARG_PTR_TO_CTX,
   2931 	.arg2_type	= ARG_ANYTHING,
   2932 	.arg3_type	= ARG_ANYTHING,
   2933 };
   2934 
   2935 BPF_CALL_2(bpf_skb_change_type, struct sk_buff *, skb, u32, pkt_type)
   2936 {
   2937 	/* We only allow a restricted subset to be changed for now. */
   2938 	if (unlikely(!skb_pkt_type_ok(skb->pkt_type) ||
   2939 		     !skb_pkt_type_ok(pkt_type)))
   2940 		return -EINVAL;
   2941 
   2942 	skb->pkt_type = pkt_type;
   2943 	return 0;
   2944 }
   2945 
   2946 static const struct bpf_func_proto bpf_skb_change_type_proto = {
   2947 	.func		= bpf_skb_change_type,
   2948 	.gpl_only	= false,
   2949 	.ret_type	= RET_INTEGER,
   2950 	.arg1_type	= ARG_PTR_TO_CTX,
   2951 	.arg2_type	= ARG_ANYTHING,
   2952 };
   2953 
   2954 static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
   2955 {
   2956 	switch (skb->protocol) {
   2957 	case htons(ETH_P_IP):
   2958 		return sizeof(struct iphdr);
   2959 	case htons(ETH_P_IPV6):
   2960 		return sizeof(struct ipv6hdr);
   2961 	default:
   2962 		return ~0U;
   2963 	}
   2964 }
   2965 
   2966 static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff)
   2967 {
   2968 	u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
   2969 	int ret;
   2970 
   2971 	if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
   2972 		return -ENOTSUPP;
   2973 
   2974 	ret = skb_cow(skb, len_diff);
   2975 	if (unlikely(ret < 0))
   2976 		return ret;
   2977 
   2978 	ret = bpf_skb_net_hdr_push(skb, off, len_diff);
   2979 	if (unlikely(ret < 0))
   2980 		return ret;
   2981 
   2982 	if (skb_is_gso(skb)) {
   2983 		struct skb_shared_info *shinfo = skb_shinfo(skb);
   2984 
   2985 		/* Due to header grow, MSS needs to be downgraded. */
   2986 		skb_decrease_gso_size(shinfo, len_diff);
   2987 		/* Header must be checked, and gso_segs recomputed. */
   2988 		shinfo->gso_type |= SKB_GSO_DODGY;
   2989 		shinfo->gso_segs = 0;
   2990 	}
   2991 
   2992 	return 0;
   2993 }
   2994 
   2995 static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff)
   2996 {
   2997 	u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
   2998 	int ret;
   2999 
   3000 	if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
   3001 		return -ENOTSUPP;
   3002 
   3003 	ret = skb_unclone(skb, GFP_ATOMIC);
   3004 	if (unlikely(ret < 0))
   3005 		return ret;
   3006 
   3007 	ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
   3008 	if (unlikely(ret < 0))
   3009 		return ret;
   3010 
   3011 	if (skb_is_gso(skb)) {
   3012 		struct skb_shared_info *shinfo = skb_shinfo(skb);
   3013 
   3014 		/* Due to header shrink, MSS can be upgraded. */
   3015 		skb_increase_gso_size(shinfo, len_diff);
   3016 		/* Header must be checked, and gso_segs recomputed. */
   3017 		shinfo->gso_type |= SKB_GSO_DODGY;
   3018 		shinfo->gso_segs = 0;
   3019 	}
   3020 
   3021 	return 0;
   3022 }
   3023 
   3024 static u32 __bpf_skb_max_len(const struct sk_buff *skb)
   3025 {
   3026 	return skb->dev ? skb->dev->mtu + skb->dev->hard_header_len :
   3027 			  SKB_MAX_ALLOC;
   3028 }
   3029 
   3030 static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
   3031 {
   3032 	bool trans_same = skb->transport_header == skb->network_header;
   3033 	u32 len_cur, len_diff_abs = abs(len_diff);
   3034 	u32 len_min = bpf_skb_net_base_len(skb);
   3035 	u32 len_max = __bpf_skb_max_len(skb);
   3036 	__be16 proto = skb->protocol;
   3037 	bool shrink = len_diff < 0;
   3038 	int ret;
   3039 
   3040 	if (unlikely(len_diff_abs > 0xfffU))
   3041 		return -EFAULT;
   3042 	if (unlikely(proto != htons(ETH_P_IP) &&
   3043 		     proto != htons(ETH_P_IPV6)))
   3044 		return -ENOTSUPP;
   3045 
   3046 	len_cur = skb->len - skb_network_offset(skb);
   3047 	if (skb_transport_header_was_set(skb) && !trans_same)
   3048 		len_cur = skb_network_header_len(skb);
   3049 	if ((shrink && (len_diff_abs >= len_cur ||
   3050 			len_cur - len_diff_abs < len_min)) ||
   3051 	    (!shrink && (skb->len + len_diff_abs > len_max &&
   3052 			 !skb_is_gso(skb))))
   3053 		return -ENOTSUPP;
   3054 
   3055 	ret = shrink ? bpf_skb_net_shrink(skb, len_diff_abs) :
   3056 		       bpf_skb_net_grow(skb, len_diff_abs);
   3057 
   3058 	bpf_compute_data_pointers(skb);
   3059 	return ret;
   3060 }
   3061 
   3062 BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
   3063 	   u32, mode, u64, flags)
   3064 {
   3065 	if (unlikely(flags))
   3066 		return -EINVAL;
   3067 	if (likely(mode == BPF_ADJ_ROOM_NET))
   3068 		return bpf_skb_adjust_net(skb, len_diff);
   3069 
   3070 	return -ENOTSUPP;
   3071 }
   3072 
   3073 static const struct bpf_func_proto bpf_skb_adjust_room_proto = {
   3074 	.func		= bpf_skb_adjust_room,
   3075 	.gpl_only	= false,
   3076 	.ret_type	= RET_INTEGER,
   3077 	.arg1_type	= ARG_PTR_TO_CTX,
   3078 	.arg2_type	= ARG_ANYTHING,
   3079 	.arg3_type	= ARG_ANYTHING,
   3080 	.arg4_type	= ARG_ANYTHING,
   3081 };
   3082 
   3083 static u32 __bpf_skb_min_len(const struct sk_buff *skb)
   3084 {
   3085 	u32 min_len = skb_network_offset(skb);
   3086 
   3087 	if (skb_transport_header_was_set(skb))
   3088 		min_len = skb_transport_offset(skb);
   3089 	if (skb->ip_summed == CHECKSUM_PARTIAL)
   3090 		min_len = skb_checksum_start_offset(skb) +
   3091 			  skb->csum_offset + sizeof(__sum16);
   3092 	return min_len;
   3093 }
   3094 
   3095 static int bpf_skb_grow_rcsum(struct sk_buff *skb, unsigned int new_len)
   3096 {
   3097 	unsigned int old_len = skb->len;
   3098 	int ret;
   3099 
   3100 	ret = __skb_grow_rcsum(skb, new_len);
   3101 	if (!ret)
   3102 		memset(skb->data + old_len, 0, new_len - old_len);
   3103 	return ret;
   3104 }
   3105 
   3106 static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len)
   3107 {
   3108 	return __skb_trim_rcsum(skb, new_len);
   3109 }
   3110 
   3111 static inline int __bpf_skb_change_tail(struct sk_buff *skb, u32 new_len,
   3112 					u64 flags)
   3113 {
   3114 	u32 max_len = __bpf_skb_max_len(skb);
   3115 	u32 min_len = __bpf_skb_min_len(skb);
   3116 	int ret;
   3117 
   3118 	if (unlikely(flags || new_len > max_len || new_len < min_len))
   3119 		return -EINVAL;
   3120 	if (skb->encapsulation)
   3121 		return -ENOTSUPP;
   3122 
   3123 	/* The basic idea of this helper is that it's performing the
   3124 	 * needed work to either grow or trim an skb, and eBPF program
   3125 	 * rewrites the rest via helpers like bpf_skb_store_bytes(),
   3126 	 * bpf_lX_csum_replace() and others rather than passing a raw
   3127 	 * buffer here. This one is a slow path helper and intended
   3128 	 * for replies with control messages.
   3129 	 *
   3130 	 * Like in bpf_skb_change_proto(), we want to keep this rather
   3131 	 * minimal and without protocol specifics so that we are able
   3132 	 * to separate concerns as in bpf_skb_store_bytes() should only
   3133 	 * be the one responsible for writing buffers.
   3134 	 *
   3135 	 * It's really expected to be a slow path operation here for
   3136 	 * control message replies, so we're implicitly linearizing,
   3137 	 * uncloning and drop offloads from the skb by this.
   3138 	 */
   3139 	ret = __bpf_try_make_writable(skb, skb->len);
   3140 	if (!ret) {
   3141 		if (new_len > skb->len)
   3142 			ret = bpf_skb_grow_rcsum(skb, new_len);
   3143 		else if (new_len < skb->len)
   3144 			ret = bpf_skb_trim_rcsum(skb, new_len);
   3145 		if (!ret && skb_is_gso(skb))
   3146 			skb_gso_reset(skb);
   3147 	}
   3148 	return ret;
   3149 }
   3150 
   3151 BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
   3152 	   u64, flags)
   3153 {
   3154 	int ret = __bpf_skb_change_tail(skb, new_len, flags);
   3155 
   3156 	bpf_compute_data_pointers(skb);
   3157 	return ret;
   3158 }
   3159 
   3160 static const struct bpf_func_proto bpf_skb_change_tail_proto = {
   3161 	.func		= bpf_skb_change_tail,
   3162 	.gpl_only	= false,
   3163 	.ret_type	= RET_INTEGER,
   3164 	.arg1_type	= ARG_PTR_TO_CTX,
   3165 	.arg2_type	= ARG_ANYTHING,
   3166 	.arg3_type	= ARG_ANYTHING,
   3167 };
   3168 
   3169 BPF_CALL_3(sk_skb_change_tail, struct sk_buff *, skb, u32, new_len,
   3170 	   u64, flags)
   3171 {
   3172 	int ret = __bpf_skb_change_tail(skb, new_len, flags);
   3173 
   3174 	bpf_compute_data_end_sk_skb(skb);
   3175 	return ret;
   3176 }
   3177 
   3178 static const struct bpf_func_proto sk_skb_change_tail_proto = {
   3179 	.func		= sk_skb_change_tail,
   3180 	.gpl_only	= false,
   3181 	.ret_type	= RET_INTEGER,
   3182 	.arg1_type	= ARG_PTR_TO_CTX,
   3183 	.arg2_type	= ARG_ANYTHING,
   3184 	.arg3_type	= ARG_ANYTHING,
   3185 };
   3186 
   3187 static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room,
   3188 					u64 flags)
   3189 {
   3190 	u32 max_len = __bpf_skb_max_len(skb);
   3191 	u32 new_len = skb->len + head_room;
   3192 	int ret;
   3193 
   3194 	if (unlikely(flags || (!skb_is_gso(skb) && new_len > max_len) ||
   3195 		     new_len < skb->len))
   3196 		return -EINVAL;
   3197 
   3198 	ret = skb_cow(skb, head_room);
   3199 	if (likely(!ret)) {
   3200 		/* Idea for this helper is that we currently only
   3201 		 * allow to expand on mac header. This means that
   3202 		 * skb->protocol network header, etc, stay as is.
   3203 		 * Compared to bpf_skb_change_tail(), we're more
   3204 		 * flexible due to not needing to linearize or
   3205 		 * reset GSO. Intention for this helper is to be
   3206 		 * used by an L3 skb that needs to push mac header
   3207 		 * for redirection into L2 device.
   3208 		 */
   3209 		__skb_push(skb, head_room);
   3210 		memset(skb->data, 0, head_room);
   3211 		skb_reset_mac_header(skb);
   3212 	}
   3213 
   3214 	return ret;
   3215 }
   3216 
   3217 BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
   3218 	   u64, flags)
   3219 {
   3220 	int ret = __bpf_skb_change_head(skb, head_room, flags);
   3221 
   3222 	bpf_compute_data_pointers(skb);
   3223 	return ret;
   3224 }
   3225 
   3226 static const struct bpf_func_proto bpf_skb_change_head_proto = {
   3227 	.func		= bpf_skb_change_head,
   3228 	.gpl_only	= false,
   3229 	.ret_type	= RET_INTEGER,
   3230 	.arg1_type	= ARG_PTR_TO_CTX,
   3231 	.arg2_type	= ARG_ANYTHING,
   3232 	.arg3_type	= ARG_ANYTHING,
   3233 };
   3234 
   3235 BPF_CALL_3(sk_skb_change_head, struct sk_buff *, skb, u32, head_room,
   3236 	   u64, flags)
   3237 {
   3238 	int ret = __bpf_skb_change_head(skb, head_room, flags);
   3239 
   3240 	bpf_compute_data_end_sk_skb(skb);
   3241 	return ret;
   3242 }
   3243 
   3244 static const struct bpf_func_proto sk_skb_change_head_proto = {
   3245 	.func		= sk_skb_change_head,
   3246 	.gpl_only	= false,
   3247 	.ret_type	= RET_INTEGER,
   3248 	.arg1_type	= ARG_PTR_TO_CTX,
   3249 	.arg2_type	= ARG_ANYTHING,
   3250 	.arg3_type	= ARG_ANYTHING,
   3251 };
   3252 static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
   3253 {
   3254 	return xdp_data_meta_unsupported(xdp) ? 0 :
   3255 	       xdp->data - xdp->data_meta;
   3256 }
   3257 
   3258 BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset)
   3259 {
   3260 	void *xdp_frame_end = xdp->data_hard_start + sizeof(struct xdp_frame);
   3261 	unsigned long metalen = xdp_get_metalen(xdp);
   3262 	void *data_start = xdp_frame_end + metalen;
   3263 	void *data = xdp->data + offset;
   3264 
   3265 	if (unlikely(data < data_start ||
   3266 		     data > xdp->data_end - ETH_HLEN))
   3267 		return -EINVAL;
   3268 
   3269 	if (metalen)
   3270 		memmove(xdp->data_meta + offset,
   3271 			xdp->data_meta, metalen);
   3272 	xdp->data_meta += offset;
   3273 	xdp->data = data;
   3274 
   3275 	return 0;
   3276 }
   3277 
   3278 static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
   3279 	.func		= bpf_xdp_adjust_head,
   3280 	.gpl_only	= false,
   3281 	.ret_type	= RET_INTEGER,
   3282 	.arg1_type	= ARG_PTR_TO_CTX,
   3283 	.arg2_type	= ARG_ANYTHING,
   3284 };
   3285 
   3286 BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
   3287 {
   3288 	void *data_end = xdp->data_end + offset;
   3289 
   3290 	/* only shrinking is allowed for now. */
   3291 	if (unlikely(offset >= 0))
   3292 		return -EINVAL;
   3293 
   3294 	if (unlikely(data_end < xdp->data + ETH_HLEN))
   3295 		return -EINVAL;
   3296 
   3297 	xdp->data_end = data_end;
   3298 
   3299 	return 0;
   3300 }
   3301 
   3302 static const struct bpf_func_proto bpf_xdp_adjust_tail_proto = {
   3303 	.func		= bpf_xdp_adjust_tail,
   3304 	.gpl_only	= false,
   3305 	.ret_type	= RET_INTEGER,
   3306 	.arg1_type	= ARG_PTR_TO_CTX,
   3307 	.arg2_type	= ARG_ANYTHING,
   3308 };
   3309 
   3310 BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset)
   3311 {
   3312 	void *xdp_frame_end = xdp->data_hard_start + sizeof(struct xdp_frame);
   3313 	void *meta = xdp->data_meta + offset;
   3314 	unsigned long metalen = xdp->data - meta;
   3315 
   3316 	if (xdp_data_meta_unsupported(xdp))
   3317 		return -ENOTSUPP;
   3318 	if (unlikely(meta < xdp_frame_end ||
   3319 		     meta > xdp->data))
   3320 		return -EINVAL;
   3321 	if (unlikely((metalen & (sizeof(__u32) - 1)) ||
   3322 		     (metalen > 32)))
   3323 		return -EACCES;
   3324 
   3325 	xdp->data_meta = meta;
   3326 
   3327 	return 0;
   3328 }
   3329 
   3330 static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
   3331 	.func		= bpf_xdp_adjust_meta,
   3332 	.gpl_only	= false,
   3333 	.ret_type	= RET_INTEGER,
   3334 	.arg1_type	= ARG_PTR_TO_CTX,
   3335 	.arg2_type	= ARG_ANYTHING,
   3336 };
   3337 
   3338 static int __bpf_tx_xdp(struct net_device *dev,
   3339 			struct bpf_map *map,
   3340 			struct xdp_buff *xdp,
   3341 			u32 index)
   3342 {
   3343 	struct xdp_frame *xdpf;
   3344 	int err, sent;
   3345 
   3346 	if (!dev->netdev_ops->ndo_xdp_xmit) {
   3347 		return -EOPNOTSUPP;
   3348 	}
   3349 
   3350 	err = xdp_ok_fwd_dev(dev, xdp->data_end - xdp->data);
   3351 	if (unlikely(err))
   3352 		return err;
   3353 
   3354 	xdpf = convert_to_xdp_frame(xdp);
   3355 	if (unlikely(!xdpf))
   3356 		return -EOVERFLOW;
   3357 
   3358 	sent = dev->netdev_ops->ndo_xdp_xmit(dev, 1, &xdpf, XDP_XMIT_FLUSH);
   3359 	if (sent <= 0)
   3360 		return sent;
   3361 	return 0;
   3362 }
   3363 
   3364 static noinline int
   3365 xdp_do_redirect_slow(struct net_device *dev, struct xdp_buff *xdp,
   3366 		     struct bpf_prog *xdp_prog, struct bpf_redirect_info *ri)
   3367 {
   3368 	struct net_device *fwd;
   3369 	u32 index = ri->ifindex;
   3370 	int err;
   3371 
   3372 	fwd = dev_get_by_index_rcu(dev_net(dev), index);
   3373 	ri->ifindex = 0;
   3374 	if (unlikely(!fwd)) {
   3375 		err = -EINVAL;
   3376 		goto err;
   3377 	}
   3378 
   3379 	err = __bpf_tx_xdp(fwd, NULL, xdp, 0);
   3380 	if (unlikely(err))
   3381 		goto err;
   3382 
   3383 	_trace_xdp_redirect(dev, xdp_prog, index);
   3384 	return 0;
   3385 err:
   3386 	_trace_xdp_redirect_err(dev, xdp_prog, index, err);
   3387 	return err;
   3388 }
   3389 
   3390 static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
   3391 			    struct bpf_map *map,
   3392 			    struct xdp_buff *xdp,
   3393 			    u32 index)
   3394 {
   3395 	int err;
   3396 
   3397 	switch (map->map_type) {
   3398 	case BPF_MAP_TYPE_DEVMAP: {
   3399 		struct bpf_dtab_netdev *dst = fwd;
   3400 
   3401 		err = dev_map_enqueue(dst, xdp, dev_rx);
   3402 		if (unlikely(err))
   3403 			return err;
   3404 		__dev_map_insert_ctx(map, index);
   3405 		break;
   3406 	}
   3407 	case BPF_MAP_TYPE_CPUMAP: {
   3408 		struct bpf_cpu_map_entry *rcpu = fwd;
   3409 
   3410 		err = cpu_map_enqueue(rcpu, xdp, dev_rx);
   3411 		if (unlikely(err))
   3412 			return err;
   3413 		__cpu_map_insert_ctx(map, index);
   3414 		break;
   3415 	}
   3416 	case BPF_MAP_TYPE_XSKMAP: {
   3417 		struct xdp_sock *xs = fwd;
   3418 
   3419 		err = __xsk_map_redirect(map, xdp, xs);
   3420 		return err;
   3421 	}
   3422 	default:
   3423 		break;
   3424 	}
   3425 	return 0;
   3426 }
   3427 
   3428 void xdp_do_flush_map(void)
   3429 {
   3430 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
   3431 	struct bpf_map *map = ri->map_to_flush;
   3432 
   3433 	ri->map_to_flush = NULL;
   3434 	if (map) {
   3435 		switch (map->map_type) {
   3436 		case BPF_MAP_TYPE_DEVMAP:
   3437 			__dev_map_flush(map);
   3438 			break;
   3439 		case BPF_MAP_TYPE_CPUMAP:
   3440 			__cpu_map_flush(map);
   3441 			break;
   3442 		case BPF_MAP_TYPE_XSKMAP:
   3443 			__xsk_map_flush(map);
   3444 			break;
   3445 		default:
   3446 			break;
   3447 		}
   3448 	}
   3449 }
   3450 EXPORT_SYMBOL_GPL(xdp_do_flush_map);
   3451 
   3452 static inline void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
   3453 {
   3454 	switch (map->map_type) {
   3455 	case BPF_MAP_TYPE_DEVMAP:
   3456 		return __dev_map_lookup_elem(map, index);
   3457 	case BPF_MAP_TYPE_CPUMAP:
   3458 		return __cpu_map_lookup_elem(map, index);
   3459 	case BPF_MAP_TYPE_XSKMAP:
   3460 		return __xsk_map_lookup_elem(map, index);
   3461 	default:
   3462 		return NULL;
   3463 	}
   3464 }
   3465 
   3466 void bpf_clear_redirect_map(struct bpf_map *map)
   3467 {
   3468 	struct bpf_redirect_info *ri;
   3469 	int cpu;
   3470 
   3471 	for_each_possible_cpu(cpu) {
   3472 		ri = per_cpu_ptr(&bpf_redirect_info, cpu);
   3473 		/* Avoid polluting remote cacheline due to writes if
   3474 		 * not needed. Once we pass this test, we need the
   3475 		 * cmpxchg() to make sure it hasn't been changed in
   3476 		 * the meantime by remote CPU.
   3477 		 */
   3478 		if (unlikely(READ_ONCE(ri->map) == map))
   3479 			cmpxchg(&ri->map, map, NULL);
   3480 	}
   3481 }
   3482 
   3483 static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
   3484 			       struct bpf_prog *xdp_prog, struct bpf_map *map,
   3485 			       struct bpf_redirect_info *ri)
   3486 {
   3487 	u32 index = ri->ifindex;
   3488 	void *fwd = NULL;
   3489 	int err;
   3490 
   3491 	ri->ifindex = 0;
   3492 	WRITE_ONCE(ri->map, NULL);
   3493 
   3494 	fwd = __xdp_map_lookup_elem(map, index);
   3495 	if (unlikely(!fwd)) {
   3496 		err = -EINVAL;
   3497 		goto err;
   3498 	}
   3499 	if (ri->map_to_flush && unlikely(ri->map_to_flush != map))
   3500 		xdp_do_flush_map();
   3501 
   3502 	err = __bpf_tx_xdp_map(dev, fwd, map, xdp, index);
   3503 	if (unlikely(err))
   3504 		goto err;
   3505 
   3506 	ri->map_to_flush = map;
   3507 	_trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
   3508 	return 0;
   3509 err:
   3510 	_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
   3511 	return err;
   3512 }
   3513 
   3514 int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
   3515 		    struct bpf_prog *xdp_prog)
   3516 {
   3517 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
   3518 	struct bpf_map *map = READ_ONCE(ri->map);
   3519 
   3520 	if (likely(map))
   3521 		return xdp_do_redirect_map(dev, xdp, xdp_prog, map, ri);
   3522 
   3523 	return xdp_do_redirect_slow(dev, xdp, xdp_prog, ri);
   3524 }
   3525 EXPORT_SYMBOL_GPL(xdp_do_redirect);
   3526 
   3527 static int xdp_do_generic_redirect_map(struct net_device *dev,
   3528 				       struct sk_buff *skb,
   3529 				       struct xdp_buff *xdp,
   3530 				       struct bpf_prog *xdp_prog,
   3531 				       struct bpf_map *map)
   3532 {
   3533 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
   3534 	u32 index = ri->ifindex;
   3535 	void *fwd = NULL;
   3536 	int err = 0;
   3537 
   3538 	ri->ifindex = 0;
   3539 	WRITE_ONCE(ri->map, NULL);
   3540 
   3541 	fwd = __xdp_map_lookup_elem(map, index);
   3542 	if (unlikely(!fwd)) {
   3543 		err = -EINVAL;
   3544 		goto err;
   3545 	}
   3546 
   3547 	if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
   3548 		struct bpf_dtab_netdev *dst = fwd;
   3549 
   3550 		err = dev_map_generic_redirect(dst, skb, xdp_prog);
   3551 		if (unlikely(err))
   3552 			goto err;
   3553 	} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
   3554 		struct xdp_sock *xs = fwd;
   3555 
   3556 		err = xsk_generic_rcv(xs, xdp);
   3557 		if (err)
   3558 			goto err;
   3559 		consume_skb(skb);
   3560 	} else {
   3561 		/* TODO: Handle BPF_MAP_TYPE_CPUMAP */
   3562 		err = -EBADRQC;
   3563 		goto err;
   3564 	}
   3565 
   3566 	_trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
   3567 	return 0;
   3568 err:
   3569 	_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
   3570 	return err;
   3571 }
   3572 
   3573 int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
   3574 			    struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
   3575 {
   3576 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
   3577 	struct bpf_map *map = READ_ONCE(ri->map);
   3578 	u32 index = ri->ifindex;
   3579 	struct net_device *fwd;
   3580 	int err = 0;
   3581 
   3582 	if (map)
   3583 		return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog,
   3584 						   map);
   3585 	ri->ifindex = 0;
   3586 	fwd = dev_get_by_index_rcu(dev_net(dev), index);
   3587 	if (unlikely(!fwd)) {
   3588 		err = -EINVAL;
   3589 		goto err;
   3590 	}
   3591 
   3592 	err = xdp_ok_fwd_dev(fwd, skb->len);
   3593 	if (unlikely(err))
   3594 		goto err;
   3595 
   3596 	skb->dev = fwd;
   3597 	_trace_xdp_redirect(dev, xdp_prog, index);
   3598 	generic_xdp_tx(skb, xdp_prog);
   3599 	return 0;
   3600 err:
   3601 	_trace_xdp_redirect_err(dev, xdp_prog, index, err);
   3602 	return err;
   3603 }
   3604 EXPORT_SYMBOL_GPL(xdp_do_generic_redirect);
   3605 
   3606 BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
   3607 {
   3608 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
   3609 
   3610 	if (unlikely(flags))
   3611 		return XDP_ABORTED;
   3612 
   3613 	ri->ifindex = ifindex;
   3614 	ri->flags = flags;
   3615 	WRITE_ONCE(ri->map, NULL);
   3616 
   3617 	return XDP_REDIRECT;
   3618 }
   3619 
   3620 static const struct bpf_func_proto bpf_xdp_redirect_proto = {
   3621 	.func           = bpf_xdp_redirect,
   3622 	.gpl_only       = false,
   3623 	.ret_type       = RET_INTEGER,
   3624 	.arg1_type      = ARG_ANYTHING,
   3625 	.arg2_type      = ARG_ANYTHING,
   3626 };
   3627 
   3628 BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex,
   3629 	   u64, flags)
   3630 {
   3631 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
   3632 
   3633 	if (unlikely(flags))
   3634 		return XDP_ABORTED;
   3635 
   3636 	ri->ifindex = ifindex;
   3637 	ri->flags = flags;
   3638 	WRITE_ONCE(ri->map, map);
   3639 
   3640 	return XDP_REDIRECT;
   3641 }
   3642 
   3643 static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
   3644 	.func           = bpf_xdp_redirect_map,
   3645 	.gpl_only       = false,
   3646 	.ret_type       = RET_INTEGER,
   3647 	.arg1_type      = ARG_CONST_MAP_PTR,
   3648 	.arg2_type      = ARG_ANYTHING,
   3649 	.arg3_type      = ARG_ANYTHING,
   3650 };
   3651 
   3652 static unsigned long bpf_skb_copy(void *dst_buff, const void *skb,
   3653 				  unsigned long off, unsigned long len)
   3654 {
   3655 	void *ptr = skb_header_pointer(skb, off, len, dst_buff);
   3656 
   3657 	if (unlikely(!ptr))
   3658 		return len;
   3659 	if (ptr != dst_buff)
   3660 		memcpy(dst_buff, ptr, len);
   3661 
   3662 	return 0;
   3663 }
   3664 
   3665 BPF_CALL_5(bpf_skb_event_output, struct sk_buff *, skb, struct bpf_map *, map,
   3666 	   u64, flags, void *, meta, u64, meta_size)
   3667 {
   3668 	u64 skb_size = (flags & BPF_F_CTXLEN_MASK) >> 32;
   3669 
   3670 	if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
   3671 		return -EINVAL;
   3672 	if (unlikely(skb_size > skb->len))
   3673 		return -EFAULT;
   3674 
   3675 	return bpf_event_output(map, flags, meta, meta_size, skb, skb_size,
   3676 				bpf_skb_copy);
   3677 }
   3678 
   3679 static const struct bpf_func_proto bpf_skb_event_output_proto = {
   3680 	.func		= bpf_skb_event_output,
   3681 	.gpl_only	= true,
   3682 	.ret_type	= RET_INTEGER,
   3683 	.arg1_type	= ARG_PTR_TO_CTX,
   3684 	.arg2_type	= ARG_CONST_MAP_PTR,
   3685 	.arg3_type	= ARG_ANYTHING,
   3686 	.arg4_type	= ARG_PTR_TO_MEM,
   3687 	.arg5_type	= ARG_CONST_SIZE_OR_ZERO,
   3688 };
   3689 
   3690 static unsigned short bpf_tunnel_key_af(u64 flags)
   3691 {
   3692 	return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET;
   3693 }
   3694 
   3695 BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff *, skb, struct bpf_tunnel_key *, to,
   3696 	   u32, size, u64, flags)
   3697 {
   3698 	const struct ip_tunnel_info *info = skb_tunnel_info(skb);
   3699 	u8 compat[sizeof(struct bpf_tunnel_key)];
   3700 	void *to_orig = to;
   3701 	int err;
   3702 
   3703 	if (unlikely(!info || (flags & ~(BPF_F_TUNINFO_IPV6)))) {
   3704 		err = -EINVAL;
   3705 		goto err_clear;
   3706 	}
   3707 	if (ip_tunnel_info_af(info) != bpf_tunnel_key_af(flags)) {
   3708 		err = -EPROTO;
   3709 		goto err_clear;
   3710 	}
   3711 	if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
   3712 		err = -EINVAL;
   3713 		switch (size) {
   3714 		case offsetof(struct bpf_tunnel_key, tunnel_label):
   3715 		case offsetof(struct bpf_tunnel_key, tunnel_ext):
   3716 			goto set_compat;
   3717 		case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
   3718 			/* Fixup deprecated structure layouts here, so we have
   3719 			 * a common path later on.
   3720 			 */
   3721 			if (ip_tunnel_info_af(info) != AF_INET)
   3722 				goto err_clear;
   3723 set_compat:
   3724 			to = (struct bpf_tunnel_key *)compat;
   3725 			break;
   3726 		default:
   3727 			goto err_clear;
   3728 		}
   3729 	}
   3730 
   3731 	to->tunnel_id = be64_to_cpu(info->key.tun_id);
   3732 	to->tunnel_tos = info->key.tos;
   3733 	to->tunnel_ttl = info->key.ttl;
   3734 	to->tunnel_ext = 0;
   3735 
   3736 	if (flags & BPF_F_TUNINFO_IPV6) {
   3737 		memcpy(to->remote_ipv6, &info->key.u.ipv6.src,
   3738 		       sizeof(to->remote_ipv6));
   3739 		to->tunnel_label = be32_to_cpu(info->key.label);
   3740 	} else {
   3741 		to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src);
   3742 		memset(&to->remote_ipv6[1], 0, sizeof(__u32) * 3);
   3743 		to->tunnel_label = 0;
   3744 	}
   3745 
   3746 	if (unlikely(size != sizeof(struct bpf_tunnel_key)))
   3747 		memcpy(to_orig, to, size);
   3748 
   3749 	return 0;
   3750 err_clear:
   3751 	memset(to_orig, 0, size);
   3752 	return err;
   3753 }
   3754 
   3755 static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
   3756 	.func		= bpf_skb_get_tunnel_key,
   3757 	.gpl_only	= false,
   3758 	.ret_type	= RET_INTEGER,
   3759 	.arg1_type	= ARG_PTR_TO_CTX,
   3760 	.arg2_type	= ARG_PTR_TO_UNINIT_MEM,
   3761 	.arg3_type	= ARG_CONST_SIZE,
   3762 	.arg4_type	= ARG_ANYTHING,
   3763 };
   3764 
   3765 BPF_CALL_3(bpf_skb_get_tunnel_opt, struct sk_buff *, skb, u8 *, to, u32, size)
   3766 {
   3767 	const struct ip_tunnel_info *info = skb_tunnel_info(skb);
   3768 	int err;
   3769 
   3770 	if (unlikely(!info ||
   3771 		     !(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT))) {
   3772 		err = -ENOENT;
   3773 		goto err_clear;
   3774 	}
   3775 	if (unlikely(size < info->options_len)) {
   3776 		err = -ENOMEM;
   3777 		goto err_clear;
   3778 	}
   3779 
   3780 	ip_tunnel_info_opts_get(to, info);
   3781 	if (size > info->options_len)
   3782 		memset(to + info->options_len, 0, size - info->options_len);
   3783 
   3784 	return info->options_len;
   3785 err_clear:
   3786 	memset(to, 0, size);
   3787 	return err;
   3788 }
   3789 
   3790 static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = {
   3791 	.func		= bpf_skb_get_tunnel_opt,
   3792 	.gpl_only	= false,
   3793 	.ret_type	= RET_INTEGER,
   3794 	.arg1_type	= ARG_PTR_TO_CTX,
   3795 	.arg2_type	= ARG_PTR_TO_UNINIT_MEM,
   3796 	.arg3_type	= ARG_CONST_SIZE,
   3797 };
   3798 
   3799 static struct metadata_dst __percpu *md_dst;
   3800 
   3801 BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
   3802 	   const struct bpf_tunnel_key *, from, u32, size, u64, flags)
   3803 {
   3804 	struct metadata_dst *md = this_cpu_ptr(md_dst);
   3805 	u8 compat[sizeof(struct bpf_tunnel_key)];
   3806 	struct ip_tunnel_info *info;
   3807 
   3808 	if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
   3809 			       BPF_F_DONT_FRAGMENT | BPF_F_SEQ_NUMBER)))
   3810 		return -EINVAL;
   3811 	if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
   3812 		switch (size) {
   3813 		case offsetof(struct bpf_tunnel_key, tunnel_label):
   3814 		case offsetof(struct bpf_tunnel_key, tunnel_ext):
   3815 		case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
   3816 			/* Fixup deprecated structure layouts here, so we have
   3817 			 * a common path later on.
   3818 			 */
   3819 			memcpy(compat, from, size);
   3820 			memset(compat + size, 0, sizeof(compat) - size);
   3821 			from = (const struct bpf_tunnel_key *) compat;
   3822 			break;
   3823 		default:
   3824 			return -EINVAL;
   3825 		}
   3826 	}
   3827 	if (unlikely((!(flags & BPF_F_TUNINFO_IPV6) && from->tunnel_label) ||
   3828 		     from->tunnel_ext))
   3829 		return -EINVAL;
   3830 
   3831 	skb_dst_drop(skb);
   3832 	dst_hold((struct dst_entry *) md);
   3833 	skb_dst_set(skb, (struct dst_entry *) md);
   3834 
   3835 	info = &md->u.tun_info;
   3836 	memset(info, 0, sizeof(*info));
   3837 	info->mode = IP_TUNNEL_INFO_TX;
   3838 
   3839 	info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE;
   3840 	if (flags & BPF_F_DONT_FRAGMENT)
   3841 		info->key.tun_flags |= TUNNEL_DONT_FRAGMENT;
   3842 	if (flags & BPF_F_ZERO_CSUM_TX)
   3843 		info->key.tun_flags &= ~TUNNEL_CSUM;
   3844 	if (flags & BPF_F_SEQ_NUMBER)
   3845 		info->key.tun_flags |= TUNNEL_SEQ;
   3846 
   3847 	info->key.tun_id = cpu_to_be64(from->tunnel_id);
   3848 	info->key.tos = from->tunnel_tos;
   3849 	info->key.ttl = from->tunnel_ttl;
   3850 
   3851 	if (flags & BPF_F_TUNINFO_IPV6) {
   3852 		info->mode |= IP_TUNNEL_INFO_IPV6;
   3853 		memcpy(&info->key.u.ipv6.dst, from->remote_ipv6,
   3854 		       sizeof(from->remote_ipv6));
   3855 		info->key.label = cpu_to_be32(from->tunnel_label) &
   3856 				  IPV6_FLOWLABEL_MASK;
   3857 	} else {
   3858 		info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
   3859 	}
   3860 
   3861 	return 0;
   3862 }
   3863 
   3864 static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
   3865 	.func		= bpf_skb_set_tunnel_key,
   3866 	.gpl_only	= false,
   3867 	.ret_type	= RET_INTEGER,
   3868 	.arg1_type	= ARG_PTR_TO_CTX,
   3869 	.arg2_type	= ARG_PTR_TO_MEM,
   3870 	.arg3_type	= ARG_CONST_SIZE,
   3871 	.arg4_type	= ARG_ANYTHING,
   3872 };
   3873 
   3874 BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb,
   3875 	   const u8 *, from, u32, size)
   3876 {
   3877 	struct ip_tunnel_info *info = skb_tunnel_info(skb);
   3878 	const struct metadata_dst *md = this_cpu_ptr(md_dst);
   3879 
   3880 	if (unlikely(info != &md->u.tun_info || (size & (sizeof(u32) - 1))))
   3881 		return -EINVAL;
   3882 	if (unlikely(size > IP_TUNNEL_OPTS_MAX))
   3883 		return -ENOMEM;
   3884 
   3885 	ip_tunnel_info_opts_set(info, from, size, TUNNEL_OPTIONS_PRESENT);
   3886 
   3887 	return 0;
   3888 }
   3889 
   3890 static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = {
   3891 	.func		= bpf_skb_set_tunnel_opt,
   3892 	.gpl_only	= false,
   3893 	.ret_type	= RET_INTEGER,
   3894 	.arg1_type	= ARG_PTR_TO_CTX,
   3895 	.arg2_type	= ARG_PTR_TO_MEM,
   3896 	.arg3_type	= ARG_CONST_SIZE,
   3897 };
   3898 
   3899 static const struct bpf_func_proto *
   3900 bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
   3901 {
   3902 	if (!md_dst) {
   3903 		struct metadata_dst __percpu *tmp;
   3904 
   3905 		tmp = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX,
   3906 						METADATA_IP_TUNNEL,
   3907 						GFP_KERNEL);
   3908 		if (!tmp)
   3909 			return NULL;
   3910 		if (cmpxchg(&md_dst, NULL, tmp))
   3911 			metadata_dst_free_percpu(tmp);
   3912 	}
   3913 
   3914 	switch (which) {
   3915 	case BPF_FUNC_skb_set_tunnel_key:
   3916 		return &bpf_skb_set_tunnel_key_proto;
   3917 	case BPF_FUNC_skb_set_tunnel_opt:
   3918 		return &bpf_skb_set_tunnel_opt_proto;
   3919 	default:
   3920 		return NULL;
   3921 	}
   3922 }
   3923 
   3924 BPF_CALL_3(bpf_skb_under_cgroup, struct sk_buff *, skb, struct bpf_map *, map,
   3925 	   u32, idx)
   3926 {
   3927 	struct bpf_array *array = container_of(map, struct bpf_array, map);
   3928 	struct cgroup *cgrp;
   3929 	struct sock *sk;
   3930 
   3931 	sk = skb_to_full_sk(skb);
   3932 	if (!sk || !sk_fullsock(sk))
   3933 		return -ENOENT;
   3934 	if (unlikely(idx >= array->map.max_entries))
   3935 		return -E2BIG;
   3936 
   3937 	cgrp = READ_ONCE(array->ptrs[idx]);
   3938 	if (unlikely(!cgrp))
   3939 		return -EAGAIN;
   3940 
   3941 	return sk_under_cgroup_hierarchy(sk, cgrp);
   3942 }
   3943 
   3944 static const struct bpf_func_proto bpf_skb_under_cgroup_proto = {
   3945 	.func		= bpf_skb_under_cgroup,
   3946 	.gpl_only	= false,
   3947 	.ret_type	= RET_INTEGER,
   3948 	.arg1_type	= ARG_PTR_TO_CTX,
   3949 	.arg2_type	= ARG_CONST_MAP_PTR,
   3950 	.arg3_type	= ARG_ANYTHING,
   3951 };
   3952 
   3953 #ifdef CONFIG_SOCK_CGROUP_DATA
   3954 BPF_CALL_1(bpf_skb_cgroup_id, const struct sk_buff *, skb)
   3955 {
   3956 	struct sock *sk = skb_to_full_sk(skb);
   3957 	struct cgroup *cgrp;
   3958 
   3959 	if (!sk || !sk_fullsock(sk))
   3960 		return 0;
   3961 
   3962 	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
   3963 	return cgrp->kn->id.id;
   3964 }
   3965 
   3966 static const struct bpf_func_proto bpf_skb_cgroup_id_proto = {
   3967 	.func           = bpf_skb_cgroup_id,
   3968 	.gpl_only       = false,
   3969 	.ret_type       = RET_INTEGER,
   3970 	.arg1_type      = ARG_PTR_TO_CTX,
   3971 };
   3972 
   3973 BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff *, skb, int,
   3974 	   ancestor_level)
   3975 {
   3976 	struct sock *sk = skb_to_full_sk(skb);
   3977 	struct cgroup *ancestor;
   3978 	struct cgroup *cgrp;
   3979 
   3980 	if (!sk || !sk_fullsock(sk))
   3981 		return 0;
   3982 
   3983 	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
   3984 	ancestor = cgroup_ancestor(cgrp, ancestor_level);
   3985 	if (!ancestor)
   3986 		return 0;
   3987 
   3988 	return ancestor->kn->id.id;
   3989 }
   3990 
   3991 static const struct bpf_func_proto bpf_skb_ancestor_cgroup_id_proto = {
   3992 	.func           = bpf_skb_ancestor_cgroup_id,
   3993 	.gpl_only       = false,
   3994 	.ret_type       = RET_INTEGER,
   3995 	.arg1_type      = ARG_PTR_TO_CTX,
   3996 	.arg2_type      = ARG_ANYTHING,
   3997 };
   3998 #endif
   3999 
   4000 static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
   4001 				  unsigned long off, unsigned long len)
   4002 {
   4003 	memcpy(dst_buff, src_buff + off, len);
   4004 	return 0;
   4005 }
   4006 
   4007 BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map,
   4008 	   u64, flags, void *, meta, u64, meta_size)
   4009 {
   4010 	u64 xdp_size = (flags & BPF_F_CTXLEN_MASK) >> 32;
   4011 
   4012 	if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
   4013 		return -EINVAL;
   4014 	if (unlikely(xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
   4015 		return -EFAULT;
   4016 
   4017 	return bpf_event_output(map, flags, meta, meta_size, xdp->data,
   4018 				xdp_size, bpf_xdp_copy);
   4019 }
   4020 
   4021 static const struct bpf_func_proto bpf_xdp_event_output_proto = {
   4022 	.func		= bpf_xdp_event_output,
   4023 	.gpl_only	= true,
   4024 	.ret_type	= RET_INTEGER,
   4025 	.arg1_type	= ARG_PTR_TO_CTX,
   4026 	.arg2_type	= ARG_CONST_MAP_PTR,
   4027 	.arg3_type	= ARG_ANYTHING,
   4028 	.arg4_type	= ARG_PTR_TO_MEM,
   4029 	.arg5_type	= ARG_CONST_SIZE_OR_ZERO,
   4030 };
   4031 
   4032 BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb)
   4033 {
   4034 	return skb->sk ? sock_gen_cookie(skb->sk) : 0;
   4035 }
   4036 
   4037 static const struct bpf_func_proto bpf_get_socket_cookie_proto = {
   4038 	.func           = bpf_get_socket_cookie,
   4039 	.gpl_only       = false,
   4040 	.ret_type       = RET_INTEGER,
   4041 	.arg1_type      = ARG_PTR_TO_CTX,
   4042 };
   4043 
   4044 BPF_CALL_1(bpf_get_socket_cookie_sock_addr, struct bpf_sock_addr_kern *, ctx)
   4045 {
   4046 	return sock_gen_cookie(ctx->sk);
   4047 }
   4048 
   4049 static const struct bpf_func_proto bpf_get_socket_cookie_sock_addr_proto = {
   4050 	.func		= bpf_get_socket_cookie_sock_addr,
   4051 	.gpl_only	= false,
   4052 	.ret_type	= RET_INTEGER,
   4053 	.arg1_type	= ARG_PTR_TO_CTX,
   4054 };
   4055 
   4056 BPF_CALL_1(bpf_get_socket_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx)
   4057 {
   4058 	return sock_gen_cookie(ctx->sk);
   4059 }
   4060 
   4061 static const struct bpf_func_proto bpf_get_socket_cookie_sock_ops_proto = {
   4062 	.func		= bpf_get_socket_cookie_sock_ops,
   4063 	.gpl_only	= false,
   4064 	.ret_type	= RET_INTEGER,
   4065 	.arg1_type	= ARG_PTR_TO_CTX,
   4066 };
   4067 
   4068 BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb)
   4069 {
   4070 	struct sock *sk = sk_to_full_sk(skb->sk);
   4071 	kuid_t kuid;
   4072 
   4073 	if (!sk || !sk_fullsock(sk))
   4074 		return overflowuid;
   4075 	kuid = sock_net_uid(sock_net(sk), sk);
   4076 	return from_kuid_munged(sock_net(sk)->user_ns, kuid);
   4077 }
   4078 
   4079 static const struct bpf_func_proto bpf_get_socket_uid_proto = {
   4080 	.func           = bpf_get_socket_uid,
   4081 	.gpl_only       = false,
   4082 	.ret_type       = RET_INTEGER,
   4083 	.arg1_type      = ARG_PTR_TO_CTX,
   4084 };
   4085 
   4086 BPF_CALL_5(bpf_sockopt_event_output, struct bpf_sock_ops_kern *, bpf_sock,
   4087 	   struct bpf_map *, map, u64, flags, void *, data, u64, size)
   4088 {
   4089 	if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
   4090 		return -EINVAL;
   4091 
   4092 	return bpf_event_output(map, flags, data, size, NULL, 0, NULL);
   4093 }
   4094 
   4095 static const struct bpf_func_proto bpf_sockopt_event_output_proto =  {
   4096 	.func		= bpf_sockopt_event_output,
   4097 	.gpl_only       = true,
   4098 	.ret_type       = RET_INTEGER,
   4099 	.arg1_type      = ARG_PTR_TO_CTX,
   4100 	.arg2_type      = ARG_CONST_MAP_PTR,
   4101 	.arg3_type      = ARG_ANYTHING,
   4102 	.arg4_type      = ARG_PTR_TO_MEM,
   4103 	.arg5_type      = ARG_CONST_SIZE_OR_ZERO,
   4104 };
   4105 
   4106 BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
   4107 	   int, level, int, optname, char *, optval, int, optlen)
   4108 {
   4109 	struct sock *sk = bpf_sock->sk;
   4110 	int ret = 0;
   4111 	int val;
   4112 
   4113 	if (!sk_fullsock(sk))
   4114 		return -EINVAL;
   4115 
   4116 	if (level == SOL_SOCKET) {
   4117 		if (optlen != sizeof(int))
   4118 			return -EINVAL;
   4119 		val = *((int *)optval);
   4120 
   4121 		/* Only some socketops are supported */
   4122 		switch (optname) {
   4123 		case SO_RCVBUF:
   4124 			val = min_t(u32, val, sysctl_rmem_max);
   4125 			sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
   4126 			sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
   4127 			break;
   4128 		case SO_SNDBUF:
   4129 			val = min_t(u32, val, sysctl_wmem_max);
   4130 			sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
   4131 			sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
   4132 			break;
   4133 		case SO_MAX_PACING_RATE: /* 32bit version */
   4134 			if (val != ~0U)
   4135 				cmpxchg(&sk->sk_pacing_status,
   4136 					SK_PACING_NONE,
   4137 					SK_PACING_NEEDED);
   4138 			sk->sk_max_pacing_rate = (val == ~0U) ? ~0UL : val;
   4139 			sk->sk_pacing_rate = min(sk->sk_pacing_rate,
   4140 						 sk->sk_max_pacing_rate);
   4141 			break;
   4142 		case SO_PRIORITY:
   4143 			sk->sk_priority = val;
   4144 			break;
   4145 		case SO_RCVLOWAT:
   4146 			if (val < 0)
   4147 				val = INT_MAX;
   4148 			sk->sk_rcvlowat = val ? : 1;
   4149 			break;
   4150 		case SO_MARK:
   4151 			if (sk->sk_mark != val) {
   4152 				sk->sk_mark = val;
   4153 				sk_dst_reset(sk);
   4154 			}
   4155 			break;
   4156 		default:
   4157 			ret = -EINVAL;
   4158 		}
   4159 #ifdef CONFIG_INET
   4160 	} else if (level == SOL_IP) {
   4161 		if (optlen != sizeof(int) || sk->sk_family != AF_INET)
   4162 			return -EINVAL;
   4163 
   4164 		val = *((int *)optval);
   4165 		/* Only some options are supported */
   4166 		switch (optname) {
   4167 		case IP_TOS:
   4168 			if (val < -1 || val > 0xff) {
   4169 				ret = -EINVAL;
   4170 			} else {
   4171 				struct inet_sock *inet = inet_sk(sk);
   4172 
   4173 				if (val == -1)
   4174 					val = 0;
   4175 				inet->tos = val;
   4176 			}
   4177 			break;
   4178 		default:
   4179 			ret = -EINVAL;
   4180 		}
   4181 #if IS_ENABLED(CONFIG_IPV6)
   4182 	} else if (level == SOL_IPV6) {
   4183 		if (optlen != sizeof(int) || sk->sk_family != AF_INET6)
   4184 			return -EINVAL;
   4185 
   4186 		val = *((int *)optval);
   4187 		/* Only some options are supported */
   4188 		switch (optname) {
   4189 		case IPV6_TCLASS:
   4190 			if (val < -1 || val > 0xff) {
   4191 				ret = -EINVAL;
   4192 			} else {
   4193 				struct ipv6_pinfo *np = inet6_sk(sk);
   4194 
   4195 				if (val == -1)
   4196 					val = 0;
   4197 				np->tclass = val;
   4198 			}
   4199 			break;
   4200 		default:
   4201 			ret = -EINVAL;
   4202 		}
   4203 #endif
   4204 	} else if (level == SOL_TCP &&
   4205 		   sk->sk_prot->setsockopt == tcp_setsockopt) {
   4206 		if (optname == TCP_CONGESTION) {
   4207 			char name[TCP_CA_NAME_MAX];
   4208 			bool reinit = bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN;
   4209 
   4210 			strncpy(name, optval, min_t(long, optlen,
   4211 						    TCP_CA_NAME_MAX-1));
   4212 			name[TCP_CA_NAME_MAX-1] = 0;
   4213 			ret = tcp_set_congestion_control(sk, name, false,
   4214 							 reinit);
   4215 		} else {
   4216 			struct tcp_sock *tp = tcp_sk(sk);
   4217 
   4218 			if (optlen != sizeof(int))
   4219 				return -EINVAL;
   4220 
   4221 			val = *((int *)optval);
   4222 			/* Only some options are supported */
   4223 			switch (optname) {
   4224 			case TCP_BPF_IW:
   4225 				if (val <= 0 || tp->data_segs_out > tp->syn_data)
   4226 					ret = -EINVAL;
   4227 				else
   4228 					tp->snd_cwnd = val;
   4229 				break;
   4230 			case TCP_BPF_SNDCWND_CLAMP:
   4231 				if (val <= 0) {
   4232 					ret = -EINVAL;
   4233 				} else {
   4234 					tp->snd_cwnd_clamp = val;
   4235 					tp->snd_ssthresh = val;
   4236 				}
   4237 				break;
   4238 			case TCP_SAVE_SYN:
   4239 				if (val < 0 || val > 1)
   4240 					ret = -EINVAL;
   4241 				else
   4242 					tp->save_syn = val;
   4243 				break;
   4244 			default:
   4245 				ret = -EINVAL;
   4246 			}
   4247 		}
   4248 #endif
   4249 	} else {
   4250 		ret = -EINVAL;
   4251 	}
   4252 	return ret;
   4253 }
   4254 
   4255 static const struct bpf_func_proto bpf_setsockopt_proto = {
   4256 	.func		= bpf_setsockopt,
   4257 	.gpl_only	= false,
   4258 	.ret_type	= RET_INTEGER,
   4259 	.arg1_type	= ARG_PTR_TO_CTX,
   4260 	.arg2_type	= ARG_ANYTHING,
   4261 	.arg3_type	= ARG_ANYTHING,
   4262 	.arg4_type	= ARG_PTR_TO_MEM,
   4263 	.arg5_type	= ARG_CONST_SIZE,
   4264 };
   4265 
   4266 BPF_CALL_5(bpf_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
   4267 	   int, level, int, optname, char *, optval, int, optlen)
   4268 {
   4269 	struct sock *sk = bpf_sock->sk;
   4270 
   4271 	if (!sk_fullsock(sk))
   4272 		goto err_clear;
   4273 #ifdef CONFIG_INET
   4274 	if (level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) {
   4275 		struct inet_connection_sock *icsk;
   4276 		struct tcp_sock *tp;
   4277 
   4278 		switch (optname) {
   4279 		case TCP_CONGESTION:
   4280 			icsk = inet_csk(sk);
   4281 
   4282 			if (!icsk->icsk_ca_ops || optlen <= 1)
   4283 				goto err_clear;
   4284 			strncpy(optval, icsk->icsk_ca_ops->name, optlen);
   4285 			optval[optlen - 1] = 0;
   4286 			break;
   4287 		case TCP_SAVED_SYN:
   4288 			tp = tcp_sk(sk);
   4289 
   4290 			if (optlen <= 0 || !tp->saved_syn ||
   4291 			    optlen > tp->saved_syn[0])
   4292 				goto err_clear;
   4293 			memcpy(optval, tp->saved_syn + 1, optlen);
   4294 			break;
   4295 		default:
   4296 			goto err_clear;
   4297 		}
   4298 	} else if (level == SOL_IP) {
   4299 		struct inet_sock *inet = inet_sk(sk);
   4300 
   4301 		if (optlen != sizeof(int) || sk->sk_family != AF_INET)
   4302 			goto err_clear;
   4303 
   4304 		/* Only some options are supported */
   4305 		switch (optname) {
   4306 		case IP_TOS:
   4307 			*((int *)optval) = (int)inet->tos;
   4308 			break;
   4309 		default:
   4310 			goto err_clear;
   4311 		}
   4312 #if IS_ENABLED(CONFIG_IPV6)
   4313 	} else if (level == SOL_IPV6) {
   4314 		struct ipv6_pinfo *np = inet6_sk(sk);
   4315 
   4316 		if (optlen != sizeof(int) || sk->sk_family != AF_INET6)
   4317 			goto err_clear;
   4318 
   4319 		/* Only some options are supported */
   4320 		switch (optname) {
   4321 		case IPV6_TCLASS:
   4322 			*((int *)optval) = (int)np->tclass;
   4323 			break;
   4324 		default:
   4325 			goto err_clear;
   4326 		}
   4327 #endif
   4328 	} else {
   4329 		goto err_clear;
   4330 	}
   4331 	return 0;
   4332 #endif
   4333 err_clear:
   4334 	memset(optval, 0, optlen);
   4335 	return -EINVAL;
   4336 }
   4337 
   4338 static const struct bpf_func_proto bpf_getsockopt_proto = {
   4339 	.func		= bpf_getsockopt,
   4340 	.gpl_only	= false,
   4341 	.ret_type	= RET_INTEGER,
   4342 	.arg1_type	= ARG_PTR_TO_CTX,
   4343 	.arg2_type	= ARG_ANYTHING,
   4344 	.arg3_type	= ARG_ANYTHING,
   4345 	.arg4_type	= ARG_PTR_TO_UNINIT_MEM,
   4346 	.arg5_type	= ARG_CONST_SIZE,
   4347 };
   4348 
   4349 BPF_CALL_2(bpf_sock_ops_cb_flags_set, struct bpf_sock_ops_kern *, bpf_sock,
   4350 	   int, argval)
   4351 {
   4352 	struct sock *sk = bpf_sock->sk;
   4353 	int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS;
   4354 
   4355 	if (!IS_ENABLED(CONFIG_INET) || !sk_fullsock(sk))
   4356 		return -EINVAL;
   4357 
   4358 	if (val)
   4359 		tcp_sk(sk)->bpf_sock_ops_cb_flags = val;
   4360 
   4361 	return argval & (~BPF_SOCK_OPS_ALL_CB_FLAGS);
   4362 }
   4363 
   4364 static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = {
   4365 	.func		= bpf_sock_ops_cb_flags_set,
   4366 	.gpl_only	= false,
   4367 	.ret_type	= RET_INTEGER,
   4368 	.arg1_type	= ARG_PTR_TO_CTX,
   4369 	.arg2_type	= ARG_ANYTHING,
   4370 };
   4371 
   4372 const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
   4373 EXPORT_SYMBOL_GPL(ipv6_bpf_stub);
   4374 
   4375 BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr,
   4376 	   int, addr_len)
   4377 {
   4378 #ifdef CONFIG_INET
   4379 	struct sock *sk = ctx->sk;
   4380 	int err;
   4381 
   4382 	/* Binding to port can be expensive so it's prohibited in the helper.
   4383 	 * Only binding to IP is supported.
   4384 	 */
   4385 	err = -EINVAL;
   4386 	if (addr_len < offsetofend(struct sockaddr, sa_family))
   4387 		return err;
   4388 	if (addr->sa_family == AF_INET) {
   4389 		if (addr_len < sizeof(struct sockaddr_in))
   4390 			return err;
   4391 		if (((struct sockaddr_in *)addr)->sin_port != htons(0))
   4392 			return err;
   4393 		return __inet_bind(sk, addr, addr_len, true, false);
   4394 #if IS_ENABLED(CONFIG_IPV6)
   4395 	} else if (addr->sa_family == AF_INET6) {
   4396 		if (addr_len < SIN6_LEN_RFC2133)
   4397 			return err;
   4398 		if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
   4399 			return err;
   4400 		/* ipv6_bpf_stub cannot be NULL, since it's called from
   4401 		 * bpf_cgroup_inet6_connect hook and ipv6 is already loaded
   4402 		 */
   4403 		return ipv6_bpf_stub->inet6_bind(sk, addr, addr_len, true, false);
   4404 #endif /* CONFIG_IPV6 */
   4405 	}
   4406 #endif /* CONFIG_INET */
   4407 
   4408 	return -EAFNOSUPPORT;
   4409 }
   4410 
   4411 static const struct bpf_func_proto bpf_bind_proto = {
   4412 	.func		= bpf_bind,
   4413 	.gpl_only	= false,
   4414 	.ret_type	= RET_INTEGER,
   4415 	.arg1_type	= ARG_PTR_TO_CTX,
   4416 	.arg2_type	= ARG_PTR_TO_MEM,
   4417 	.arg3_type	= ARG_CONST_SIZE,
   4418 };
   4419 
   4420 #ifdef CONFIG_XFRM
   4421 BPF_CALL_5(bpf_skb_get_xfrm_state, struct sk_buff *, skb, u32, index,
   4422 	   struct bpf_xfrm_state *, to, u32, size, u64, flags)
   4423 {
   4424 	const struct sec_path *sp = skb_sec_path(skb);
   4425 	const struct xfrm_state *x;
   4426 
   4427 	if (!sp || unlikely(index >= sp->len || flags))
   4428 		goto err_clear;
   4429 
   4430 	x = sp->xvec[index];
   4431 
   4432 	if (unlikely(size != sizeof(struct bpf_xfrm_state)))
   4433 		goto err_clear;
   4434 
   4435 	to->reqid = x->props.reqid;
   4436 	to->spi = x->id.spi;
   4437 	to->family = x->props.family;
   4438 	to->ext = 0;
   4439 
   4440 	if (to->family == AF_INET6) {
   4441 		memcpy(to->remote_ipv6, x->props.saddr.a6,
   4442 		       sizeof(to->remote_ipv6));
   4443 	} else {
   4444 		to->remote_ipv4 = x->props.saddr.a4;
   4445 		memset(&to->remote_ipv6[1], 0, sizeof(__u32) * 3);
   4446 	}
   4447 
   4448 	return 0;
   4449 err_clear:
   4450 	memset(to, 0, size);
   4451 	return -EINVAL;
   4452 }
   4453 
   4454 static const struct bpf_func_proto bpf_skb_get_xfrm_state_proto = {
   4455 	.func		= bpf_skb_get_xfrm_state,
   4456 	.gpl_only	= false,
   4457 	.ret_type	= RET_INTEGER,
   4458 	.arg1_type	= ARG_PTR_TO_CTX,
   4459 	.arg2_type	= ARG_ANYTHING,
   4460 	.arg3_type	= ARG_PTR_TO_UNINIT_MEM,
   4461 	.arg4_type	= ARG_CONST_SIZE,
   4462 	.arg5_type	= ARG_ANYTHING,
   4463 };
   4464 #endif
   4465 
   4466 #if IS_ENABLED(CONFIG_INET) || IS_ENABLED(CONFIG_IPV6)
   4467 static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params,
   4468 				  const struct neighbour *neigh,
   4469 				  const struct net_device *dev)
   4470 {
   4471 	memcpy(params->dmac, neigh->ha, ETH_ALEN);
   4472 	memcpy(params->smac, dev->dev_addr, ETH_ALEN);
   4473 	params->h_vlan_TCI = 0;
   4474 	params->h_vlan_proto = 0;
   4475 	params->ifindex = dev->ifindex;
   4476 
   4477 	return 0;
   4478 }
   4479 #endif
   4480 
   4481 #if IS_ENABLED(CONFIG_INET)
   4482 static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
   4483 			       u32 flags, bool check_mtu)
   4484 {
   4485 	struct in_device *in_dev;
   4486 	struct neighbour *neigh;
   4487 	struct net_device *dev;
   4488 	struct fib_result res;
   4489 	struct fib_nh *nh;
   4490 	struct flowi4 fl4;
   4491 	int err;
   4492 	u32 mtu;
   4493 
   4494 	dev = dev_get_by_index_rcu(net, params->ifindex);
   4495 	if (unlikely(!dev))
   4496 		return -ENODEV;
   4497 
   4498 	/* verify forwarding is enabled on this interface */
   4499 	in_dev = __in_dev_get_rcu(dev);
   4500 	if (unlikely(!in_dev || !IN_DEV_FORWARD(in_dev)))
   4501 		return BPF_FIB_LKUP_RET_FWD_DISABLED;
   4502 
   4503 	if (flags & BPF_FIB_LOOKUP_OUTPUT) {
   4504 		fl4.flowi4_iif = 1;
   4505 		fl4.flowi4_oif = params->ifindex;
   4506 	} else {
   4507 		fl4.flowi4_iif = params->ifindex;
   4508 		fl4.flowi4_oif = 0;
   4509 	}
   4510 	fl4.flowi4_tos = params->tos & IPTOS_RT_MASK;
   4511 	fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
   4512 	fl4.flowi4_flags = 0;
   4513 
   4514 	fl4.flowi4_proto = params->l4_protocol;
   4515 	fl4.daddr = params->ipv4_dst;
   4516 	fl4.saddr = params->ipv4_src;
   4517 	fl4.fl4_sport = params->sport;
   4518 	fl4.fl4_dport = params->dport;
   4519 
   4520 	if (flags & BPF_FIB_LOOKUP_DIRECT) {
   4521 		u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
   4522 		struct fib_table *tb;
   4523 
   4524 		tb = fib_get_table(net, tbid);
   4525 		if (unlikely(!tb))
   4526 			return BPF_FIB_LKUP_RET_NOT_FWDED;
   4527 
   4528 		err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
   4529 	} else {
   4530 		fl4.flowi4_mark = 0;
   4531 		fl4.flowi4_secid = 0;
   4532 		fl4.flowi4_tun_key.tun_id = 0;
   4533 		fl4.flowi4_uid = sock_net_uid(net, NULL);
   4534 
   4535 		err = fib_lookup(net, &fl4, &res, FIB_LOOKUP_NOREF);
   4536 	}
   4537 
   4538 	if (err) {
   4539 		/* map fib lookup errors to RTN_ type */
   4540 		if (err == -EINVAL)
   4541 			return BPF_FIB_LKUP_RET_BLACKHOLE;
   4542 		if (err == -EHOSTUNREACH)
   4543 			return BPF_FIB_LKUP_RET_UNREACHABLE;
   4544 		if (err == -EACCES)
   4545 			return BPF_FIB_LKUP_RET_PROHIBIT;
   4546 
   4547 		return BPF_FIB_LKUP_RET_NOT_FWDED;
   4548 	}
   4549 
   4550 	if (res.type != RTN_UNICAST)
   4551 		return BPF_FIB_LKUP_RET_NOT_FWDED;
   4552 
   4553 	if (res.fi->fib_nhs > 1)
   4554 		fib_select_path(net, &res, &fl4, NULL);
   4555 
   4556 	if (check_mtu) {
   4557 		mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst);
   4558 		if (params->tot_len > mtu)
   4559 			return BPF_FIB_LKUP_RET_FRAG_NEEDED;
   4560 	}
   4561 
   4562 	nh = &res.fi->fib_nh[res.nh_sel];
   4563 
   4564 	/* do not handle lwt encaps right now */
   4565 	if (nh->nh_lwtstate)
   4566 		return BPF_FIB_LKUP_RET_UNSUPP_LWT;
   4567 
   4568 	dev = nh->nh_dev;
   4569 	if (nh->nh_gw)
   4570 		params->ipv4_dst = nh->nh_gw;
   4571 
   4572 	params->rt_metric = res.fi->fib_priority;
   4573 
   4574 	/* xdp and cls_bpf programs are run in RCU-bh so
   4575 	 * rcu_read_lock_bh is not needed here
   4576 	 */
   4577 	neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)params->ipv4_dst);
   4578 	if (!neigh)
   4579 		return BPF_FIB_LKUP_RET_NO_NEIGH;
   4580 
   4581 	return bpf_fib_set_fwd_params(params, neigh, dev);
   4582 }
   4583 #endif
   4584 
   4585 #if IS_ENABLED(CONFIG_IPV6)
   4586 static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
   4587 			       u32 flags, bool check_mtu)
   4588 {
   4589 	struct in6_addr *src = (struct in6_addr *) params->ipv6_src;
   4590 	struct in6_addr *dst = (struct in6_addr *) params->ipv6_dst;
   4591 	struct neighbour *neigh;
   4592 	struct net_device *dev;
   4593 	struct inet6_dev *idev;
   4594 	struct fib6_info *f6i;
   4595 	struct flowi6 fl6;
   4596 	int strict = 0;
   4597 	int oif;
   4598 	u32 mtu;
   4599 
   4600 	/* link local addresses are never forwarded */
   4601 	if (rt6_need_strict(dst) || rt6_need_strict(src))
   4602 		return BPF_FIB_LKUP_RET_NOT_FWDED;
   4603 
   4604 	dev = dev_get_by_index_rcu(net, params->ifindex);
   4605 	if (unlikely(!dev))
   4606 		return -ENODEV;
   4607 
   4608 	idev = __in6_dev_get_safely(dev);
   4609 	if (unlikely(!idev || !net->ipv6.devconf_all->forwarding))
   4610 		return BPF_FIB_LKUP_RET_FWD_DISABLED;
   4611 
   4612 	if (flags & BPF_FIB_LOOKUP_OUTPUT) {
   4613 		fl6.flowi6_iif = 1;
   4614 		oif = fl6.flowi6_oif = params->ifindex;
   4615 	} else {
   4616 		oif = fl6.flowi6_iif = params->ifindex;
   4617 		fl6.flowi6_oif = 0;
   4618 		strict = RT6_LOOKUP_F_HAS_SADDR;
   4619 	}
   4620 	fl6.flowlabel = params->flowinfo;
   4621 	fl6.flowi6_scope = 0;
   4622 	fl6.flowi6_flags = 0;
   4623 	fl6.mp_hash = 0;
   4624 
   4625 	fl6.flowi6_proto = params->l4_protocol;
   4626 	fl6.daddr = *dst;
   4627 	fl6.saddr = *src;
   4628 	fl6.fl6_sport = params->sport;
   4629 	fl6.fl6_dport = params->dport;
   4630 
   4631 	if (flags & BPF_FIB_LOOKUP_DIRECT) {
   4632 		u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
   4633 		struct fib6_table *tb;
   4634 
   4635 		tb = ipv6_stub->fib6_get_table(net, tbid);
   4636 		if (unlikely(!tb))
   4637 			return BPF_FIB_LKUP_RET_NOT_FWDED;
   4638 
   4639 		f6i = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, strict);
   4640 	} else {
   4641 		fl6.flowi6_mark = 0;
   4642 		fl6.flowi6_secid = 0;
   4643 		fl6.flowi6_tun_key.tun_id = 0;
   4644 		fl6.flowi6_uid = sock_net_uid(net, NULL);
   4645 
   4646 		f6i = ipv6_stub->fib6_lookup(net, oif, &fl6, strict);
   4647 	}
   4648 
   4649 	if (unlikely(IS_ERR_OR_NULL(f6i) || f6i == net->ipv6.fib6_null_entry))
   4650 		return BPF_FIB_LKUP_RET_NOT_FWDED;
   4651 
   4652 	if (unlikely(f6i->fib6_flags & RTF_REJECT)) {
   4653 		switch (f6i->fib6_type) {
   4654 		case RTN_BLACKHOLE:
   4655 			return BPF_FIB_LKUP_RET_BLACKHOLE;
   4656 		case RTN_UNREACHABLE:
   4657 			return BPF_FIB_LKUP_RET_UNREACHABLE;
   4658 		case RTN_PROHIBIT:
   4659 			return BPF_FIB_LKUP_RET_PROHIBIT;
   4660 		default:
   4661 			return BPF_FIB_LKUP_RET_NOT_FWDED;
   4662 		}
   4663 	}
   4664 
   4665 	if (f6i->fib6_type != RTN_UNICAST)
   4666 		return BPF_FIB_LKUP_RET_NOT_FWDED;
   4667 
   4668 	if (f6i->fib6_nsiblings && fl6.flowi6_oif == 0)
   4669 		f6i = ipv6_stub->fib6_multipath_select(net, f6i, &fl6,
   4670 						       fl6.flowi6_oif, NULL,
   4671 						       strict);
   4672 
   4673 	if (check_mtu) {
   4674 		mtu = ipv6_stub->ip6_mtu_from_fib6(f6i, dst, src);
   4675 		if (params->tot_len > mtu)
   4676 			return BPF_FIB_LKUP_RET_FRAG_NEEDED;
   4677 	}
   4678 
   4679 	if (f6i->fib6_nh.nh_lwtstate)
   4680 		return BPF_FIB_LKUP_RET_UNSUPP_LWT;
   4681 
   4682 	if (f6i->fib6_flags & RTF_GATEWAY)
   4683 		*dst = f6i->fib6_nh.nh_gw;
   4684 
   4685 	dev = f6i->fib6_nh.nh_dev;
   4686 	params->rt_metric = f6i->fib6_metric;
   4687 
   4688 	/* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is
   4689 	 * not needed here. Can not use __ipv6_neigh_lookup_noref here
   4690 	 * because we need to get nd_tbl via the stub
   4691 	 */
   4692 	neigh = ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128,
   4693 				      ndisc_hashfn, dst, dev);
   4694 	if (!neigh)
   4695 		return BPF_FIB_LKUP_RET_NO_NEIGH;
   4696 
   4697 	return bpf_fib_set_fwd_params(params, neigh, dev);
   4698 }
   4699 #endif
   4700 
   4701 BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
   4702 	   struct bpf_fib_lookup *, params, int, plen, u32, flags)
   4703 {
   4704 	if (plen < sizeof(*params))
   4705 		return -EINVAL;
   4706 
   4707 	if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT))
   4708 		return -EINVAL;
   4709 
   4710 	switch (params->family) {
   4711 #if IS_ENABLED(CONFIG_INET)
   4712 	case AF_INET:
   4713 		return bpf_ipv4_fib_lookup(dev_net(ctx->rxq->dev), params,
   4714 					   flags, true);
   4715 #endif
   4716 #if IS_ENABLED(CONFIG_IPV6)
   4717 	case AF_INET6:
   4718 		return bpf_ipv6_fib_lookup(dev_net(ctx->rxq->dev), params,
   4719 					   flags, true);
   4720 #endif
   4721 	}
   4722 	return -EAFNOSUPPORT;
   4723 }
   4724 
   4725 static const struct bpf_func_proto bpf_xdp_fib_lookup_proto = {
   4726 	.func		= bpf_xdp_fib_lookup,
   4727 	.gpl_only	= true,
   4728 	.ret_type	= RET_INTEGER,
   4729 	.arg1_type      = ARG_PTR_TO_CTX,
   4730 	.arg2_type      = ARG_PTR_TO_MEM,
   4731 	.arg3_type      = ARG_CONST_SIZE,
   4732 	.arg4_type	= ARG_ANYTHING,
   4733 };
   4734 
   4735 BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
   4736 	   struct bpf_fib_lookup *, params, int, plen, u32, flags)
   4737 {
   4738 	struct net *net = dev_net(skb->dev);
   4739 	int rc = -EAFNOSUPPORT;
   4740 
   4741 	if (plen < sizeof(*params))
   4742 		return -EINVAL;
   4743 
   4744 	if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT))
   4745 		return -EINVAL;
   4746 
   4747 	switch (params->family) {
   4748 #if IS_ENABLED(CONFIG_INET)
   4749 	case AF_INET:
   4750 		rc = bpf_ipv4_fib_lookup(net, params, flags, false);
   4751 		break;
   4752 #endif
   4753 #if IS_ENABLED(CONFIG_IPV6)
   4754 	case AF_INET6:
   4755 		rc = bpf_ipv6_fib_lookup(net, params, flags, false);
   4756 		break;
   4757 #endif
   4758 	}
   4759 
   4760 	if (!rc) {
   4761 		struct net_device *dev;
   4762 
   4763 		dev = dev_get_by_index_rcu(net, params->ifindex);
   4764 		if (!is_skb_forwardable(dev, skb))
   4765 			rc = BPF_FIB_LKUP_RET_FRAG_NEEDED;
   4766 	}
   4767 
   4768 	return rc;
   4769 }
   4770 
   4771 static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
   4772 	.func		= bpf_skb_fib_lookup,
   4773 	.gpl_only	= true,
   4774 	.ret_type	= RET_INTEGER,
   4775 	.arg1_type      = ARG_PTR_TO_CTX,
   4776 	.arg2_type      = ARG_PTR_TO_MEM,
   4777 	.arg3_type      = ARG_CONST_SIZE,
   4778 	.arg4_type	= ARG_ANYTHING,
   4779 };
   4780 
   4781 #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
   4782 static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
   4783 {
   4784 	int err;
   4785 	struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)hdr;
   4786 
   4787 	if (!seg6_validate_srh(srh, len))
   4788 		return -EINVAL;
   4789 
   4790 	switch (type) {
   4791 	case BPF_LWT_ENCAP_SEG6_INLINE:
   4792 		if (skb->protocol != htons(ETH_P_IPV6))
   4793 			return -EBADMSG;
   4794 
   4795 		err = seg6_do_srh_inline(skb, srh);
   4796 		break;
   4797 	case BPF_LWT_ENCAP_SEG6:
   4798 		skb_reset_inner_headers(skb);
   4799 		skb->encapsulation = 1;
   4800 		err = seg6_do_srh_encap(skb, srh, IPPROTO_IPV6);
   4801 		break;
   4802 	default:
   4803 		return -EINVAL;
   4804 	}
   4805 
   4806 	bpf_compute_data_pointers(skb);
   4807 	if (err)
   4808 		return err;
   4809 
   4810 	ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
   4811 	skb_set_transport_header(skb, sizeof(struct ipv6hdr));
   4812 
   4813 	return seg6_lookup_nexthop(skb, NULL, 0);
   4814 }
   4815 #endif /* CONFIG_IPV6_SEG6_BPF */
   4816 
   4817 #if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
   4818 static int bpf_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len,
   4819 			     bool ingress)
   4820 {
   4821 	return bpf_lwt_push_ip_encap(skb, hdr, len, ingress);
   4822 }
   4823 #endif
   4824 
   4825 BPF_CALL_4(bpf_lwt_in_push_encap, struct sk_buff *, skb, u32, type, void *, hdr,
   4826 	   u32, len)
   4827 {
   4828 	switch (type) {
   4829 #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
   4830 	case BPF_LWT_ENCAP_SEG6:
   4831 	case BPF_LWT_ENCAP_SEG6_INLINE:
   4832 		return bpf_push_seg6_encap(skb, type, hdr, len);
   4833 #endif
   4834 #if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
   4835 	case BPF_LWT_ENCAP_IP:
   4836 		return bpf_push_ip_encap(skb, hdr, len, true /* ingress */);
   4837 #endif
   4838 	default:
   4839 		return -EINVAL;
   4840 	}
   4841 }
   4842 
   4843 BPF_CALL_4(bpf_lwt_xmit_push_encap, struct sk_buff *, skb, u32, type,
   4844 	   void *, hdr, u32, len)
   4845 {
   4846 	switch (type) {
   4847 #if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
   4848 	case BPF_LWT_ENCAP_IP:
   4849 		return bpf_push_ip_encap(skb, hdr, len, false /* egress */);
   4850 #endif
   4851 	default:
   4852 		return -EINVAL;
   4853 	}
   4854 }
   4855 
   4856 static const struct bpf_func_proto bpf_lwt_in_push_encap_proto = {
   4857 	.func		= bpf_lwt_in_push_encap,
   4858 	.gpl_only	= false,
   4859 	.ret_type	= RET_INTEGER,
   4860 	.arg1_type	= ARG_PTR_TO_CTX,
   4861 	.arg2_type	= ARG_ANYTHING,
   4862 	.arg3_type	= ARG_PTR_TO_MEM,
   4863 	.arg4_type	= ARG_CONST_SIZE
   4864 };
   4865 
   4866 static const struct bpf_func_proto bpf_lwt_xmit_push_encap_proto = {
   4867 	.func		= bpf_lwt_xmit_push_encap,
   4868 	.gpl_only	= false,
   4869 	.ret_type	= RET_INTEGER,
   4870 	.arg1_type	= ARG_PTR_TO_CTX,
   4871 	.arg2_type	= ARG_ANYTHING,
   4872 	.arg3_type	= ARG_PTR_TO_MEM,
   4873 	.arg4_type	= ARG_CONST_SIZE
   4874 };
   4875 
   4876 #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
   4877 BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset,
   4878 	   const void *, from, u32, len)
   4879 {
   4880 	struct seg6_bpf_srh_state *srh_state =
   4881 		this_cpu_ptr(&seg6_bpf_srh_states);
   4882 	struct ipv6_sr_hdr *srh = srh_state->srh;
   4883 	void *srh_tlvs, *srh_end, *ptr;
   4884 	int srhoff = 0;
   4885 
   4886 	if (srh == NULL)
   4887 		return -EINVAL;
   4888 
   4889 	srh_tlvs = (void *)((char *)srh + ((srh->first_segment + 1) << 4));
   4890 	srh_end = (void *)((char *)srh + sizeof(*srh) + srh_state->hdrlen);
   4891 
   4892 	ptr = skb->data + offset;
   4893 	if (ptr >= srh_tlvs && ptr + len <= srh_end)
   4894 		srh_state->valid = false;
   4895 	else if (ptr < (void *)&srh->flags ||
   4896 		 ptr + len > (void *)&srh->segments)
   4897 		return -EFAULT;
   4898 
   4899 	if (unlikely(bpf_try_make_writable(skb, offset + len)))
   4900 		return -EFAULT;
   4901 	if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
   4902 		return -EINVAL;
   4903 	srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
   4904 
   4905 	memcpy(skb->data + offset, from, len);
   4906 	return 0;
   4907 }
   4908 
   4909 static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = {
   4910 	.func		= bpf_lwt_seg6_store_bytes,
   4911 	.gpl_only	= false,
   4912 	.ret_type	= RET_INTEGER,
   4913 	.arg1_type	= ARG_PTR_TO_CTX,
   4914 	.arg2_type	= ARG_ANYTHING,
   4915 	.arg3_type	= ARG_PTR_TO_MEM,
   4916 	.arg4_type	= ARG_CONST_SIZE
   4917 };
   4918 
   4919 static void bpf_update_srh_state(struct sk_buff *skb)
   4920 {
   4921 	struct seg6_bpf_srh_state *srh_state =
   4922 		this_cpu_ptr(&seg6_bpf_srh_states);
   4923 	int srhoff = 0;
   4924 
   4925 	if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) {
   4926 		srh_state->srh = NULL;
   4927 	} else {
   4928 		srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
   4929 		srh_state->hdrlen = srh_state->srh->hdrlen << 3;
   4930 		srh_state->valid = true;
   4931 	}
   4932 }
   4933 
   4934 BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb,
   4935 	   u32, action, void *, param, u32, param_len)
   4936 {
   4937 	struct seg6_bpf_srh_state *srh_state =
   4938 		this_cpu_ptr(&seg6_bpf_srh_states);
   4939 	int hdroff = 0;
   4940 	int err;
   4941 
   4942 	switch (action) {
   4943 	case SEG6_LOCAL_ACTION_END_X:
   4944 		if (!seg6_bpf_has_valid_srh(skb))
   4945 			return -EBADMSG;
   4946 		if (param_len != sizeof(struct in6_addr))
   4947 			return -EINVAL;
   4948 		return seg6_lookup_nexthop(skb, (struct in6_addr *)param, 0);
   4949 	case SEG6_LOCAL_ACTION_END_T:
   4950 		if (!seg6_bpf_has_valid_srh(skb))
   4951 			return -EBADMSG;
   4952 		if (param_len != sizeof(int))
   4953 			return -EINVAL;
   4954 		return seg6_lookup_nexthop(skb, NULL, *(int *)param);
   4955 	case SEG6_LOCAL_ACTION_END_DT6:
   4956 		if (!seg6_bpf_has_valid_srh(skb))
   4957 			return -EBADMSG;
   4958 		if (param_len != sizeof(int))
   4959 			return -EINVAL;
   4960 
   4961 		if (ipv6_find_hdr(skb, &hdroff, IPPROTO_IPV6, NULL, NULL) < 0)
   4962 			return -EBADMSG;
   4963 		if (!pskb_pull(skb, hdroff))
   4964 			return -EBADMSG;
   4965 
   4966 		skb_postpull_rcsum(skb, skb_network_header(skb), hdroff);
   4967 		skb_reset_network_header(skb);
   4968 		skb_reset_transport_header(skb);
   4969 		skb->encapsulation = 0;
   4970 
   4971 		bpf_compute_data_pointers(skb);
   4972 		bpf_update_srh_state(skb);
   4973 		return seg6_lookup_nexthop(skb, NULL, *(int *)param);
   4974 	case SEG6_LOCAL_ACTION_END_B6:
   4975 		if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
   4976 			return -EBADMSG;
   4977 		err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6_INLINE,
   4978 					  param, param_len);
   4979 		if (!err)
   4980 			bpf_update_srh_state(skb);
   4981 
   4982 		return err;
   4983 	case SEG6_LOCAL_ACTION_END_B6_ENCAP:
   4984 		if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
   4985 			return -EBADMSG;
   4986 		err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6,
   4987 					  param, param_len);
   4988 		if (!err)
   4989 			bpf_update_srh_state(skb);
   4990 
   4991 		return err;
   4992 	default:
   4993 		return -EINVAL;
   4994 	}
   4995 }
   4996 
   4997 static const struct bpf_func_proto bpf_lwt_seg6_action_proto = {
   4998 	.func		= bpf_lwt_seg6_action,
   4999 	.gpl_only	= false,
   5000 	.ret_type	= RET_INTEGER,
   5001 	.arg1_type	= ARG_PTR_TO_CTX,
   5002 	.arg2_type	= ARG_ANYTHING,
   5003 	.arg3_type	= ARG_PTR_TO_MEM,
   5004 	.arg4_type	= ARG_CONST_SIZE
   5005 };
   5006 
   5007 BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset,
   5008 	   s32, len)
   5009 {
   5010 	struct seg6_bpf_srh_state *srh_state =
   5011 		this_cpu_ptr(&seg6_bpf_srh_states);
   5012 	struct ipv6_sr_hdr *srh = srh_state->srh;
   5013 	void *srh_end, *srh_tlvs, *ptr;
   5014 	struct ipv6hdr *hdr;
   5015 	int srhoff = 0;
   5016 	int ret;
   5017 
   5018 	if (unlikely(srh == NULL))
   5019 		return -EINVAL;
   5020 
   5021 	srh_tlvs = (void *)((unsigned char *)srh + sizeof(*srh) +
   5022 			((srh->first_segment + 1) << 4));
   5023 	srh_end = (void *)((unsigned char *)srh + sizeof(*srh) +
   5024 			srh_state->hdrlen);
   5025 	ptr = skb->data + offset;
   5026 
   5027 	if (unlikely(ptr < srh_tlvs || ptr > srh_end))
   5028 		return -EFAULT;
   5029 	if (unlikely(len < 0 && (void *)((char *)ptr - len) > srh_end))
   5030 		return -EFAULT;
   5031 
   5032 	if (len > 0) {
   5033 		ret = skb_cow_head(skb, len);
   5034 		if (unlikely(ret < 0))
   5035 			return ret;
   5036 
   5037 		ret = bpf_skb_net_hdr_push(skb, offset, len);
   5038 	} else {
   5039 		ret = bpf_skb_net_hdr_pop(skb, offset, -1 * len);
   5040 	}
   5041 
   5042 	bpf_compute_data_pointers(skb);
   5043 	if (unlikely(ret < 0))
   5044 		return ret;
   5045 
   5046 	hdr = (struct ipv6hdr *)skb->data;
   5047 	hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
   5048 
   5049 	if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
   5050 		return -EINVAL;
   5051 	srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
   5052 	srh_state->hdrlen += len;
   5053 	srh_state->valid = false;
   5054 	return 0;
   5055 }
   5056 
   5057 static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
   5058 	.func		= bpf_lwt_seg6_adjust_srh,
   5059 	.gpl_only	= false,
   5060 	.ret_type	= RET_INTEGER,
   5061 	.arg1_type	= ARG_PTR_TO_CTX,
   5062 	.arg2_type	= ARG_ANYTHING,
   5063 	.arg3_type	= ARG_ANYTHING,
   5064 };
   5065 #endif /* CONFIG_IPV6_SEG6_BPF */
   5066 
   5067 #define CONVERT_COMMON_TCP_SOCK_FIELDS(md_type, CONVERT)		\
   5068 do {									\
   5069 	switch (si->off) {						\
   5070 	case offsetof(md_type, snd_cwnd):				\
   5071 		CONVERT(snd_cwnd); break;				\
   5072 	case offsetof(md_type, srtt_us):				\
   5073 		CONVERT(srtt_us); break;				\
   5074 	case offsetof(md_type, snd_ssthresh):				\
   5075 		CONVERT(snd_ssthresh); break;				\
   5076 	case offsetof(md_type, rcv_nxt):				\
   5077 		CONVERT(rcv_nxt); break;				\
   5078 	case offsetof(md_type, snd_nxt):				\
   5079 		CONVERT(snd_nxt); break;				\
   5080 	case offsetof(md_type, snd_una):				\
   5081 		CONVERT(snd_una); break;				\
   5082 	case offsetof(md_type, mss_cache):				\
   5083 		CONVERT(mss_cache); break;				\
   5084 	case offsetof(md_type, ecn_flags):				\
   5085 		CONVERT(ecn_flags); break;				\
   5086 	case offsetof(md_type, rate_delivered):				\
   5087 		CONVERT(rate_delivered); break;				\
   5088 	case offsetof(md_type, rate_interval_us):			\
   5089 		CONVERT(rate_interval_us); break;			\
   5090 	case offsetof(md_type, packets_out):				\
   5091 		CONVERT(packets_out); break;				\
   5092 	case offsetof(md_type, retrans_out):				\
   5093 		CONVERT(retrans_out); break;				\
   5094 	case offsetof(md_type, total_retrans):				\
   5095 		CONVERT(total_retrans); break;				\
   5096 	case offsetof(md_type, segs_in):				\
   5097 		CONVERT(segs_in); break;				\
   5098 	case offsetof(md_type, data_segs_in):				\
   5099 		CONVERT(data_segs_in); break;				\
   5100 	case offsetof(md_type, segs_out):				\
   5101 		CONVERT(segs_out); break;				\
   5102 	case offsetof(md_type, data_segs_out):				\
   5103 		CONVERT(data_segs_out); break;				\
   5104 	case offsetof(md_type, lost_out):				\
   5105 		CONVERT(lost_out); break;				\
   5106 	case offsetof(md_type, sacked_out):				\
   5107 		CONVERT(sacked_out); break;				\
   5108 	case offsetof(md_type, bytes_received):				\
   5109 		CONVERT(bytes_received); break;				\
   5110 	case offsetof(md_type, bytes_acked):				\
   5111 		CONVERT(bytes_acked); break;				\
   5112 	}								\
   5113 } while (0)
   5114 
   5115 #ifdef CONFIG_INET
   5116 static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
   5117 			      int dif, int sdif, u8 family, u8 proto)
   5118 {
   5119 	bool refcounted = false;
   5120 	struct sock *sk = NULL;
   5121 
   5122 	if (family == AF_INET) {
   5123 		__be32 src4 = tuple->ipv4.saddr;
   5124 		__be32 dst4 = tuple->ipv4.daddr;
   5125 
   5126 		if (proto == IPPROTO_TCP)
   5127 			sk = __inet_lookup(net, &tcp_hashinfo, NULL, 0,
   5128 					   src4, tuple->ipv4.sport,
   5129 					   dst4, tuple->ipv4.dport,
   5130 					   dif, sdif, &refcounted);
   5131 		else
   5132 			sk = __udp4_lib_lookup(net, src4, tuple->ipv4.sport,
   5133 					       dst4, tuple->ipv4.dport,
   5134 					       dif, sdif, &udp_table, NULL);
   5135 #if IS_ENABLED(CONFIG_IPV6)
   5136 	} else {
   5137 		struct in6_addr *src6 = (struct in6_addr *)&tuple->ipv6.saddr;
   5138 		struct in6_addr *dst6 = (struct in6_addr *)&tuple->ipv6.daddr;
   5139 
   5140 		if (proto == IPPROTO_TCP)
   5141 			sk = __inet6_lookup(net, &tcp_hashinfo, NULL, 0,
   5142 					    src6, tuple->ipv6.sport,
   5143 					    dst6, ntohs(tuple->ipv6.dport),
   5144 					    dif, sdif, &refcounted);
   5145 		else if (likely(ipv6_bpf_stub))
   5146 			sk = ipv6_bpf_stub->udp6_lib_lookup(net,
   5147 							    src6, tuple->ipv6.sport,
   5148 							    dst6, tuple->ipv6.dport,
   5149 							    dif, sdif,
   5150 							    &udp_table, NULL);
   5151 #endif
   5152 	}
   5153 
   5154 	if (unlikely(sk && !refcounted && !sock_flag(sk, SOCK_RCU_FREE))) {
   5155 		WARN_ONCE(1, "Found non-RCU, unreferenced socket!");
   5156 		sk = NULL;
   5157 	}
   5158 	return sk;
   5159 }
   5160 
   5161 /* bpf_sk_lookup performs the core lookup for different types of sockets,
   5162  * taking a reference on the socket if it doesn't have the flag SOCK_RCU_FREE.
   5163  * Returns the socket as an 'unsigned long' to simplify the casting in the
   5164  * callers to satisfy BPF_CALL declarations.
   5165  */
   5166 static unsigned long
   5167 __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
   5168 		struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
   5169 		u64 flags)
   5170 {
   5171 	struct sock *sk = NULL;
   5172 	u8 family = AF_UNSPEC;
   5173 	struct net *net;
   5174 	int sdif;
   5175 
   5176 	family = len == sizeof(tuple->ipv4) ? AF_INET : AF_INET6;
   5177 	if (unlikely(family == AF_UNSPEC || flags ||
   5178 		     !((s32)netns_id < 0 || netns_id <= S32_MAX)))
   5179 		goto out;
   5180 
   5181 	if (family == AF_INET)
   5182 		sdif = inet_sdif(skb);
   5183 	else
   5184 		sdif = inet6_sdif(skb);
   5185 
   5186 	if ((s32)netns_id < 0) {
   5187 		net = caller_net;
   5188 		sk = sk_lookup(net, tuple, ifindex, sdif, family, proto);
   5189 	} else {
   5190 		net = get_net_ns_by_id(caller_net, netns_id);
   5191 		if (unlikely(!net))
   5192 			goto out;
   5193 		sk = sk_lookup(net, tuple, ifindex, sdif, family, proto);
   5194 		put_net(net);
   5195 	}
   5196 
   5197 	if (sk)
   5198 		sk = sk_to_full_sk(sk);
   5199 out:
   5200 	return (unsigned long) sk;
   5201 }
   5202 
   5203 static unsigned long
   5204 bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
   5205 	      u8 proto, u64 netns_id, u64 flags)
   5206 {
   5207 	struct net *caller_net;
   5208 	int ifindex;
   5209 
   5210 	if (skb->dev) {
   5211 		caller_net = dev_net(skb->dev);
   5212 		ifindex = skb->dev->ifindex;
   5213 	} else {
   5214 		caller_net = sock_net(skb->sk);
   5215 		ifindex = 0;
   5216 	}
   5217 
   5218 	return __bpf_sk_lookup(skb, tuple, len, caller_net, ifindex,
   5219 			      proto, netns_id, flags);
   5220 }
   5221 
   5222 BPF_CALL_5(bpf_sk_lookup_tcp, struct sk_buff *, skb,
   5223 	   struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
   5224 {
   5225 	return bpf_sk_lookup(skb, tuple, len, IPPROTO_TCP, netns_id, flags);
   5226 }
   5227 
   5228 static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = {
   5229 	.func		= bpf_sk_lookup_tcp,
   5230 	.gpl_only	= false,
   5231 	.pkt_access	= true,
   5232 	.ret_type	= RET_PTR_TO_SOCKET_OR_NULL,
   5233 	.arg1_type	= ARG_PTR_TO_CTX,
   5234 	.arg2_type	= ARG_PTR_TO_MEM,
   5235 	.arg3_type	= ARG_CONST_SIZE,
   5236 	.arg4_type	= ARG_ANYTHING,
   5237 	.arg5_type	= ARG_ANYTHING,
   5238 };
   5239 
   5240 BPF_CALL_5(bpf_sk_lookup_udp, struct sk_buff *, skb,
   5241 	   struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
   5242 {
   5243 	return bpf_sk_lookup(skb, tuple, len, IPPROTO_UDP, netns_id, flags);
   5244 }
   5245 
   5246 static const struct bpf_func_proto bpf_sk_lookup_udp_proto = {
   5247 	.func		= bpf_sk_lookup_udp,
   5248 	.gpl_only	= false,
   5249 	.pkt_access	= true,
   5250 	.ret_type	= RET_PTR_TO_SOCKET_OR_NULL,
   5251 	.arg1_type	= ARG_PTR_TO_CTX,
   5252 	.arg2_type	= ARG_PTR_TO_MEM,
   5253 	.arg3_type	= ARG_CONST_SIZE,
   5254 	.arg4_type	= ARG_ANYTHING,
   5255 	.arg5_type	= ARG_ANYTHING,
   5256 };
   5257 
   5258 BPF_CALL_1(bpf_sk_release, struct sock *, sk)
   5259 {
   5260 	if (!sock_flag(sk, SOCK_RCU_FREE))
   5261 		sock_gen_put(sk);
   5262 	return 0;
   5263 }
   5264 
   5265 static const struct bpf_func_proto bpf_sk_release_proto = {
   5266 	.func		= bpf_sk_release,
   5267 	.gpl_only	= false,
   5268 	.ret_type	= RET_INTEGER,
   5269 	.arg1_type	= ARG_PTR_TO_SOCK_COMMON,
   5270 };
   5271 
   5272 BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
   5273 	   struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
   5274 {
   5275 	struct net *caller_net = dev_net(ctx->rxq->dev);
   5276 	int ifindex = ctx->rxq->dev->ifindex;
   5277 
   5278 	return __bpf_sk_lookup(NULL, tuple, len, caller_net, ifindex,
   5279 			      IPPROTO_UDP, netns_id, flags);
   5280 }
   5281 
   5282 static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = {
   5283 	.func           = bpf_xdp_sk_lookup_udp,
   5284 	.gpl_only       = false,
   5285 	.pkt_access     = true,
   5286 	.ret_type       = RET_PTR_TO_SOCKET_OR_NULL,
   5287 	.arg1_type      = ARG_PTR_TO_CTX,
   5288 	.arg2_type      = ARG_PTR_TO_MEM,
   5289 	.arg3_type      = ARG_CONST_SIZE,
   5290 	.arg4_type      = ARG_ANYTHING,
   5291 	.arg5_type      = ARG_ANYTHING,
   5292 };
   5293 
   5294 BPF_CALL_5(bpf_xdp_sk_lookup_tcp, struct xdp_buff *, ctx,
   5295 	   struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
   5296 {
   5297 	struct net *caller_net = dev_net(ctx->rxq->dev);
   5298 	int ifindex = ctx->rxq->dev->ifindex;
   5299 
   5300 	return __bpf_sk_lookup(NULL, tuple, len, caller_net, ifindex,
   5301 			      IPPROTO_TCP, netns_id, flags);
   5302 }
   5303 
   5304 static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = {
   5305 	.func           = bpf_xdp_sk_lookup_tcp,
   5306 	.gpl_only       = false,
   5307 	.pkt_access     = true,
   5308 	.ret_type       = RET_PTR_TO_SOCKET_OR_NULL,
   5309 	.arg1_type      = ARG_PTR_TO_CTX,
   5310 	.arg2_type      = ARG_PTR_TO_MEM,
   5311 	.arg3_type      = ARG_CONST_SIZE,
   5312 	.arg4_type      = ARG_ANYTHING,
   5313 	.arg5_type      = ARG_ANYTHING,
   5314 };
   5315 
   5316 BPF_CALL_5(bpf_sock_addr_sk_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
   5317 	   struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
   5318 {
   5319 	return __bpf_sk_lookup(NULL, tuple, len, sock_net(ctx->sk), 0,
   5320 			       IPPROTO_TCP, netns_id, flags);
   5321 }
   5322 
   5323 static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = {
   5324 	.func		= bpf_sock_addr_sk_lookup_tcp,
   5325 	.gpl_only	= false,
   5326 	.ret_type	= RET_PTR_TO_SOCKET_OR_NULL,
   5327 	.arg1_type	= ARG_PTR_TO_CTX,
   5328 	.arg2_type	= ARG_PTR_TO_MEM,
   5329 	.arg3_type	= ARG_CONST_SIZE,
   5330 	.arg4_type	= ARG_ANYTHING,
   5331 	.arg5_type	= ARG_ANYTHING,
   5332 };
   5333 
   5334 BPF_CALL_5(bpf_sock_addr_sk_lookup_udp, struct bpf_sock_addr_kern *, ctx,
   5335 	   struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
   5336 {
   5337 	return __bpf_sk_lookup(NULL, tuple, len, sock_net(ctx->sk), 0,
   5338 			       IPPROTO_UDP, netns_id, flags);
   5339 }
   5340 
   5341 static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = {
   5342 	.func		= bpf_sock_addr_sk_lookup_udp,
   5343 	.gpl_only	= false,
   5344 	.ret_type	= RET_PTR_TO_SOCKET_OR_NULL,
   5345 	.arg1_type	= ARG_PTR_TO_CTX,
   5346 	.arg2_type	= ARG_PTR_TO_MEM,
   5347 	.arg3_type	= ARG_CONST_SIZE,
   5348 	.arg4_type	= ARG_ANYTHING,
   5349 	.arg5_type	= ARG_ANYTHING,
   5350 };
   5351 
   5352 bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
   5353 				  struct bpf_insn_access_aux *info)
   5354 {
   5355 	if (off < 0 || off >= offsetofend(struct bpf_tcp_sock, bytes_acked))
   5356 		return false;
   5357 
   5358 	if (off % size != 0)
   5359 		return false;
   5360 
   5361 	switch (off) {
   5362 	case offsetof(struct bpf_tcp_sock, bytes_received):
   5363 	case offsetof(struct bpf_tcp_sock, bytes_acked):
   5364 		return size == sizeof(__u64);
   5365 	default:
   5366 		return size == sizeof(__u32);
   5367 	}
   5368 }
   5369 
   5370 u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
   5371 				    const struct bpf_insn *si,
   5372 				    struct bpf_insn *insn_buf,
   5373 				    struct bpf_prog *prog, u32 *target_size)
   5374 {
   5375 	struct bpf_insn *insn = insn_buf;
   5376 
   5377 #define BPF_TCP_SOCK_GET_COMMON(FIELD)					\
   5378 	do {								\
   5379 		BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, FIELD) >	\
   5380 			     FIELD_SIZEOF(struct bpf_tcp_sock, FIELD));	\
   5381 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct tcp_sock, FIELD),\
   5382 				      si->dst_reg, si->src_reg,		\
   5383 				      offsetof(struct tcp_sock, FIELD)); \
   5384 	} while (0)
   5385 
   5386 	CONVERT_COMMON_TCP_SOCK_FIELDS(struct bpf_tcp_sock,
   5387 				       BPF_TCP_SOCK_GET_COMMON);
   5388 
   5389 	if (insn > insn_buf)
   5390 		return insn - insn_buf;
   5391 
   5392 	switch (si->off) {
   5393 	case offsetof(struct bpf_tcp_sock, rtt_min):
   5394 		BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, rtt_min) !=
   5395 			     sizeof(struct minmax));
   5396 		BUILD_BUG_ON(sizeof(struct minmax) <
   5397 			     sizeof(struct minmax_sample));
   5398 
   5399 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
   5400 				      offsetof(struct tcp_sock, rtt_min) +
   5401 				      offsetof(struct minmax_sample, v));
   5402 		break;
   5403 	}
   5404 
   5405 	return insn - insn_buf;
   5406 }
   5407 
   5408 BPF_CALL_1(bpf_tcp_sock, struct sock *, sk)
   5409 {
   5410 	if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
   5411 		return (unsigned long)sk;
   5412 
   5413 	return (unsigned long)NULL;
   5414 }
   5415 
   5416 static const struct bpf_func_proto bpf_tcp_sock_proto = {
   5417 	.func		= bpf_tcp_sock,
   5418 	.gpl_only	= false,
   5419 	.ret_type	= RET_PTR_TO_TCP_SOCK_OR_NULL,
   5420 	.arg1_type	= ARG_PTR_TO_SOCK_COMMON,
   5421 };
   5422 
   5423 BPF_CALL_1(bpf_get_listener_sock, struct sock *, sk)
   5424 {
   5425 	sk = sk_to_full_sk(sk);
   5426 
   5427 	if (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_RCU_FREE))
   5428 		return (unsigned long)sk;
   5429 
   5430 	return (unsigned long)NULL;
   5431 }
   5432 
   5433 static const struct bpf_func_proto bpf_get_listener_sock_proto = {
   5434 	.func		= bpf_get_listener_sock,
   5435 	.gpl_only	= false,
   5436 	.ret_type	= RET_PTR_TO_SOCKET_OR_NULL,
   5437 	.arg1_type	= ARG_PTR_TO_SOCK_COMMON,
   5438 };
   5439 
   5440 BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb)
   5441 {
   5442 	unsigned int iphdr_len;
   5443 
   5444 	if (skb->protocol == cpu_to_be16(ETH_P_IP))
   5445 		iphdr_len = sizeof(struct iphdr);
   5446 	else if (skb->protocol == cpu_to_be16(ETH_P_IPV6))
   5447 		iphdr_len = sizeof(struct ipv6hdr);
   5448 	else
   5449 		return 0;
   5450 
   5451 	if (skb_headlen(skb) < iphdr_len)
   5452 		return 0;
   5453 
   5454 	if (skb_cloned(skb) && !skb_clone_writable(skb, iphdr_len))
   5455 		return 0;
   5456 
   5457 	return INET_ECN_set_ce(skb);
   5458 }
   5459 
   5460 static const struct bpf_func_proto bpf_skb_ecn_set_ce_proto = {
   5461 	.func           = bpf_skb_ecn_set_ce,
   5462 	.gpl_only       = false,
   5463 	.ret_type       = RET_INTEGER,
   5464 	.arg1_type      = ARG_PTR_TO_CTX,
   5465 };
   5466 #endif /* CONFIG_INET */
   5467 
   5468 bool bpf_helper_changes_pkt_data(void *func)
   5469 {
   5470 	if (func == bpf_skb_vlan_push ||
   5471 	    func == bpf_skb_vlan_pop ||
   5472 	    func == bpf_skb_store_bytes ||
   5473 	    func == bpf_skb_change_proto ||
   5474 	    func == bpf_skb_change_head ||
   5475 	    func == sk_skb_change_head ||
   5476 	    func == bpf_skb_change_tail ||
   5477 	    func == sk_skb_change_tail ||
   5478 	    func == bpf_skb_adjust_room ||
   5479 	    func == bpf_skb_pull_data ||
   5480 	    func == sk_skb_pull_data ||
   5481 	    func == bpf_clone_redirect ||
   5482 	    func == bpf_l3_csum_replace ||
   5483 	    func == bpf_l4_csum_replace ||
   5484 	    func == bpf_xdp_adjust_head ||
   5485 	    func == bpf_xdp_adjust_meta ||
   5486 	    func == bpf_msg_pull_data ||
   5487 	    func == bpf_msg_push_data ||
   5488 	    func == bpf_msg_pop_data ||
   5489 	    func == bpf_xdp_adjust_tail ||
   5490 #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
   5491 	    func == bpf_lwt_seg6_store_bytes ||
   5492 	    func == bpf_lwt_seg6_adjust_srh ||
   5493 	    func == bpf_lwt_seg6_action ||
   5494 #endif
   5495 	    func == bpf_lwt_in_push_encap ||
   5496 	    func == bpf_lwt_xmit_push_encap)
   5497 		return true;
   5498 
   5499 	return false;
   5500 }
   5501 
   5502 static const struct bpf_func_proto *
   5503 bpf_base_func_proto(enum bpf_func_id func_id)
   5504 {
   5505 	switch (func_id) {
   5506 	case BPF_FUNC_map_lookup_elem:
   5507 		return &bpf_map_lookup_elem_proto;
   5508 	case BPF_FUNC_map_update_elem:
   5509 		return &bpf_map_update_elem_proto;
   5510 	case BPF_FUNC_map_delete_elem:
   5511 		return &bpf_map_delete_elem_proto;
   5512 	case BPF_FUNC_map_push_elem:
   5513 		return &bpf_map_push_elem_proto;
   5514 	case BPF_FUNC_map_pop_elem:
   5515 		return &bpf_map_pop_elem_proto;
   5516 	case BPF_FUNC_map_peek_elem:
   5517 		return &bpf_map_peek_elem_proto;
   5518 	case BPF_FUNC_get_prandom_u32:
   5519 		return &bpf_get_prandom_u32_proto;
   5520 	case BPF_FUNC_get_smp_processor_id:
   5521 		return &bpf_get_raw_smp_processor_id_proto;
   5522 	case BPF_FUNC_get_numa_node_id:
   5523 		return &bpf_get_numa_node_id_proto;
   5524 	case BPF_FUNC_tail_call:
   5525 		return &bpf_tail_call_proto;
   5526 	case BPF_FUNC_ktime_get_ns:
   5527 		return &bpf_ktime_get_ns_proto;
   5528 	default:
   5529 		break;
   5530 	}
   5531 
   5532 	if (!capable(CAP_SYS_ADMIN))
   5533 		return NULL;
   5534 
   5535 	switch (func_id) {
   5536 	case BPF_FUNC_spin_lock:
   5537 		return &bpf_spin_lock_proto;
   5538 	case BPF_FUNC_spin_unlock:
   5539 		return &bpf_spin_unlock_proto;
   5540 	case BPF_FUNC_trace_printk:
   5541 		return bpf_get_trace_printk_proto();
   5542 	default:
   5543 		return NULL;
   5544 	}
   5545 }
   5546 
   5547 static const struct bpf_func_proto *
   5548 sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
   5549 {
   5550 	switch (func_id) {
   5551 	/* inet and inet6 sockets are created in a process
   5552 	 * context so there is always a valid uid/gid
   5553 	 */
   5554 	case BPF_FUNC_get_current_uid_gid:
   5555 		return &bpf_get_current_uid_gid_proto;
   5556 	case BPF_FUNC_get_local_storage:
   5557 		return &bpf_get_local_storage_proto;
   5558 	default:
   5559 		return bpf_base_func_proto(func_id);
   5560 	}
   5561 }
   5562 
   5563 static const struct bpf_func_proto *
   5564 sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
   5565 {
   5566 	switch (func_id) {
   5567 	/* inet and inet6 sockets are created in a process
   5568 	 * context so there is always a valid uid/gid
   5569 	 */
   5570 	case BPF_FUNC_get_current_uid_gid:
   5571 		return &bpf_get_current_uid_gid_proto;
   5572 	case BPF_FUNC_bind:
   5573 		switch (prog->expected_attach_type) {
   5574 		case BPF_CGROUP_INET4_CONNECT:
   5575 		case BPF_CGROUP_INET6_CONNECT:
   5576 			return &bpf_bind_proto;
   5577 		default:
   5578 			return NULL;
   5579 		}
   5580 	case BPF_FUNC_get_socket_cookie:
   5581 		return &bpf_get_socket_cookie_sock_addr_proto;
   5582 	case BPF_FUNC_get_local_storage:
   5583 		return &bpf_get_local_storage_proto;
   5584 #ifdef CONFIG_INET
   5585 	case BPF_FUNC_sk_lookup_tcp:
   5586 		return &bpf_sock_addr_sk_lookup_tcp_proto;
   5587 	case BPF_FUNC_sk_lookup_udp:
   5588 		return &bpf_sock_addr_sk_lookup_udp_proto;
   5589 	case BPF_FUNC_sk_release:
   5590 		return &bpf_sk_release_proto;
   5591 #endif /* CONFIG_INET */
   5592 	default:
   5593 		return bpf_base_func_proto(func_id);
   5594 	}
   5595 }
   5596 
   5597 static const struct bpf_func_proto *
   5598 sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
   5599 {
   5600 	switch (func_id) {
   5601 	case BPF_FUNC_skb_load_bytes:
   5602 		return &bpf_skb_load_bytes_proto;
   5603 	case BPF_FUNC_skb_load_bytes_relative:
   5604 		return &bpf_skb_load_bytes_relative_proto;
   5605 	case BPF_FUNC_get_socket_cookie:
   5606 		return &bpf_get_socket_cookie_proto;
   5607 	case BPF_FUNC_get_socket_uid:
   5608 		return &bpf_get_socket_uid_proto;
   5609 	default:
   5610 		return bpf_base_func_proto(func_id);
   5611 	}
   5612 }
   5613 
   5614 static const struct bpf_func_proto *
   5615 cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
   5616 {
   5617 	switch (func_id) {
   5618 	case BPF_FUNC_get_local_storage:
   5619 		return &bpf_get_local_storage_proto;
   5620 	case BPF_FUNC_sk_fullsock:
   5621 		return &bpf_sk_fullsock_proto;
   5622 #ifdef CONFIG_INET
   5623 	case BPF_FUNC_tcp_sock:
   5624 		return &bpf_tcp_sock_proto;
   5625 	case BPF_FUNC_get_listener_sock:
   5626 		return &bpf_get_listener_sock_proto;
   5627 	case BPF_FUNC_skb_ecn_set_ce:
   5628 		return &bpf_skb_ecn_set_ce_proto;
   5629 #endif
   5630 	default:
   5631 		return sk_filter_func_proto(func_id, prog);
   5632 	}
   5633 }
   5634 
   5635 static const struct bpf_func_proto *
   5636 tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
   5637 {
   5638 	switch (func_id) {
   5639 	case BPF_FUNC_skb_store_bytes:
   5640 		return &bpf_skb_store_bytes_proto;
   5641 	case BPF_FUNC_skb_load_bytes:
   5642 		return &bpf_skb_load_bytes_proto;
   5643 	case BPF_FUNC_skb_load_bytes_relative:
   5644 		return &bpf_skb_load_bytes_relative_proto;
   5645 	case BPF_FUNC_skb_pull_data:
   5646 		return &bpf_skb_pull_data_proto;
   5647 	case BPF_FUNC_csum_diff:
   5648 		return &bpf_csum_diff_proto;
   5649 	case BPF_FUNC_csum_update:
   5650 		return &bpf_csum_update_proto;
   5651 	case BPF_FUNC_l3_csum_replace:
   5652 		return &bpf_l3_csum_replace_proto;
   5653 	case BPF_FUNC_l4_csum_replace:
   5654 		return &bpf_l4_csum_replace_proto;
   5655 	case BPF_FUNC_clone_redirect:
   5656 		return &bpf_clone_redirect_proto;
   5657 	case BPF_FUNC_get_cgroup_classid:
   5658 		return &bpf_get_cgroup_classid_proto;
   5659 	case BPF_FUNC_skb_vlan_push:
   5660 		return &bpf_skb_vlan_push_proto;
   5661 	case BPF_FUNC_skb_vlan_pop:
   5662 		return &bpf_skb_vlan_pop_proto;
   5663 	case BPF_FUNC_skb_change_proto:
   5664 		return &bpf_skb_change_proto_proto;
   5665 	case BPF_FUNC_skb_change_type:
   5666 		return &bpf_skb_change_type_proto;
   5667 	case BPF_FUNC_skb_adjust_room:
   5668 		return &bpf_skb_adjust_room_proto;
   5669 	case BPF_FUNC_skb_change_tail:
   5670 		return &bpf_skb_change_tail_proto;
   5671 	case BPF_FUNC_skb_get_tunnel_key:
   5672 		return &bpf_skb_get_tunnel_key_proto;
   5673 	case BPF_FUNC_skb_set_tunnel_key:
   5674 		return bpf_get_skb_set_tunnel_proto(func_id);
   5675 	case BPF_FUNC_skb_get_tunnel_opt:
   5676 		return &bpf_skb_get_tunnel_opt_proto;
   5677 	case BPF_FUNC_skb_set_tunnel_opt:
   5678 		return bpf_get_skb_set_tunnel_proto(func_id);
   5679 	case BPF_FUNC_redirect:
   5680 		return &bpf_redirect_proto;
   5681 	case BPF_FUNC_get_route_realm:
   5682 		return &bpf_get_route_realm_proto;
   5683 	case BPF_FUNC_get_hash_recalc:
   5684 		return &bpf_get_hash_recalc_proto;
   5685 	case BPF_FUNC_set_hash_invalid:
   5686 		return &bpf_set_hash_invalid_proto;
   5687 	case BPF_FUNC_set_hash:
   5688 		return &bpf_set_hash_proto;
   5689 	case BPF_FUNC_perf_event_output:
   5690 		return &bpf_skb_event_output_proto;
   5691 	case BPF_FUNC_get_smp_processor_id:
   5692 		return &bpf_get_smp_processor_id_proto;
   5693 	case BPF_FUNC_skb_under_cgroup:
   5694 		return &bpf_skb_under_cgroup_proto;
   5695 	case BPF_FUNC_get_socket_cookie:
   5696 		return &bpf_get_socket_cookie_proto;
   5697 	case BPF_FUNC_get_socket_uid:
   5698 		return &bpf_get_socket_uid_proto;
   5699 	case BPF_FUNC_fib_lookup:
   5700 		return &bpf_skb_fib_lookup_proto;
   5701 	case BPF_FUNC_sk_fullsock:
   5702 		return &bpf_sk_fullsock_proto;
   5703 #ifdef CONFIG_XFRM
   5704 	case BPF_FUNC_skb_get_xfrm_state:
   5705 		return &bpf_skb_get_xfrm_state_proto;
   5706 #endif
   5707 #ifdef CONFIG_SOCK_CGROUP_DATA
   5708 	case BPF_FUNC_skb_cgroup_id:
   5709 		return &bpf_skb_cgroup_id_proto;
   5710 	case BPF_FUNC_skb_ancestor_cgroup_id:
   5711 		return &bpf_skb_ancestor_cgroup_id_proto;
   5712 #endif
   5713 #ifdef CONFIG_INET
   5714 	case BPF_FUNC_sk_lookup_tcp:
   5715 		return &bpf_sk_lookup_tcp_proto;
   5716 	case BPF_FUNC_sk_lookup_udp:
   5717 		return &bpf_sk_lookup_udp_proto;
   5718 	case BPF_FUNC_sk_release:
   5719 		return &bpf_sk_release_proto;
   5720 	case BPF_FUNC_tcp_sock:
   5721 		return &bpf_tcp_sock_proto;
   5722 	case BPF_FUNC_get_listener_sock:
   5723 		return &bpf_get_listener_sock_proto;
   5724 #endif
   5725 	default:
   5726 		return bpf_base_func_proto(func_id);
   5727 	}
   5728 }
   5729 
   5730 static const struct bpf_func_proto *
   5731 xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
   5732 {
   5733 	switch (func_id) {
   5734 	case BPF_FUNC_perf_event_output:
   5735 		return &bpf_xdp_event_output_proto;
   5736 	case BPF_FUNC_get_smp_processor_id:
   5737 		return &bpf_get_smp_processor_id_proto;
   5738 	case BPF_FUNC_csum_diff:
   5739 		return &bpf_csum_diff_proto;
   5740 	case BPF_FUNC_xdp_adjust_head:
   5741 		return &bpf_xdp_adjust_head_proto;
   5742 	case BPF_FUNC_xdp_adjust_meta:
   5743 		return &bpf_xdp_adjust_meta_proto;
   5744 	case BPF_FUNC_redirect:
   5745 		return &bpf_xdp_redirect_proto;
   5746 	case BPF_FUNC_redirect_map:
   5747 		return &bpf_xdp_redirect_map_proto;
   5748 	case BPF_FUNC_xdp_adjust_tail:
   5749 		return &bpf_xdp_adjust_tail_proto;
   5750 	case BPF_FUNC_fib_lookup:
   5751 		return &bpf_xdp_fib_lookup_proto;
   5752 #ifdef CONFIG_INET
   5753 	case BPF_FUNC_sk_lookup_udp:
   5754 		return &bpf_xdp_sk_lookup_udp_proto;
   5755 	case BPF_FUNC_sk_lookup_tcp:
   5756 		return &bpf_xdp_sk_lookup_tcp_proto;
   5757 	case BPF_FUNC_sk_release:
   5758 		return &bpf_sk_release_proto;
   5759 #endif
   5760 	default:
   5761 		return bpf_base_func_proto(func_id);
   5762 	}
   5763 }
   5764 
   5765 const struct bpf_func_proto bpf_sock_map_update_proto __weak;
   5766 const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
   5767 
   5768 static const struct bpf_func_proto *
   5769 sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
   5770 {
   5771 	switch (func_id) {
   5772 	case BPF_FUNC_setsockopt:
   5773 		return &bpf_setsockopt_proto;
   5774 	case BPF_FUNC_getsockopt:
   5775 		return &bpf_getsockopt_proto;
   5776 	case BPF_FUNC_sock_ops_cb_flags_set:
   5777 		return &bpf_sock_ops_cb_flags_set_proto;
   5778 	case BPF_FUNC_sock_map_update:
   5779 		return &bpf_sock_map_update_proto;
   5780 	case BPF_FUNC_sock_hash_update:
   5781 		return &bpf_sock_hash_update_proto;
   5782 	case BPF_FUNC_get_socket_cookie:
   5783 		return &bpf_get_socket_cookie_sock_ops_proto;
   5784 	case BPF_FUNC_get_local_storage:
   5785 		return &bpf_get_local_storage_proto;
   5786 	case BPF_FUNC_perf_event_output:
   5787 		return &bpf_sockopt_event_output_proto;
   5788 	default:
   5789 		return bpf_base_func_proto(func_id);
   5790 	}
   5791 }
   5792 
   5793 const struct bpf_func_proto bpf_msg_redirect_map_proto __weak;
   5794 const struct bpf_func_proto bpf_msg_redirect_hash_proto __weak;
   5795 
   5796 static const struct bpf_func_proto *
   5797 sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
   5798 {
   5799 	switch (func_id) {
   5800 	case BPF_FUNC_msg_redirect_map:
   5801 		return &bpf_msg_redirect_map_proto;
   5802 	case BPF_FUNC_msg_redirect_hash:
   5803 		return &bpf_msg_redirect_hash_proto;
   5804 	case BPF_FUNC_msg_apply_bytes:
   5805 		return &bpf_msg_apply_bytes_proto;
   5806 	case BPF_FUNC_msg_cork_bytes:
   5807 		return &bpf_msg_cork_bytes_proto;
   5808 	case BPF_FUNC_msg_pull_data:
   5809 		return &bpf_msg_pull_data_proto;
   5810 	case BPF_FUNC_msg_push_data:
   5811 		return &bpf_msg_push_data_proto;
   5812 	case BPF_FUNC_msg_pop_data:
   5813 		return &bpf_msg_pop_data_proto;
   5814 	default:
   5815 		return bpf_base_func_proto(func_id);
   5816 	}
   5817 }
   5818 
   5819 const struct bpf_func_proto bpf_sk_redirect_map_proto __weak;
   5820 const struct bpf_func_proto bpf_sk_redirect_hash_proto __weak;
   5821 
   5822 static const struct bpf_func_proto *
   5823 sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
   5824 {
   5825 	switch (func_id) {
   5826 	case BPF_FUNC_skb_store_bytes:
   5827 		return &bpf_skb_store_bytes_proto;
   5828 	case BPF_FUNC_skb_load_bytes:
   5829 		return &bpf_skb_load_bytes_proto;
   5830 	case BPF_FUNC_skb_pull_data:
   5831 		return &sk_skb_pull_data_proto;
   5832 	case BPF_FUNC_skb_change_tail:
   5833 		return &sk_skb_change_tail_proto;
   5834 	case BPF_FUNC_skb_change_head:
   5835 		return &sk_skb_change_head_proto;
   5836 	case BPF_FUNC_get_socket_cookie:
   5837 		return &bpf_get_socket_cookie_proto;
   5838 	case BPF_FUNC_get_socket_uid:
   5839 		return &bpf_get_socket_uid_proto;
   5840 	case BPF_FUNC_sk_redirect_map:
   5841 		return &bpf_sk_redirect_map_proto;
   5842 	case BPF_FUNC_sk_redirect_hash:
   5843 		return &bpf_sk_redirect_hash_proto;
   5844 #ifdef CONFIG_INET
   5845 	case BPF_FUNC_sk_lookup_tcp:
   5846 		return &bpf_sk_lookup_tcp_proto;
   5847 	case BPF_FUNC_sk_lookup_udp:
   5848 		return &bpf_sk_lookup_udp_proto;
   5849 	case BPF_FUNC_sk_release:
   5850 		return &bpf_sk_release_proto;
   5851 #endif
   5852 	default:
   5853 		return bpf_base_func_proto(func_id);
   5854 	}
   5855 }
   5856 
   5857 static const struct bpf_func_proto *
   5858 flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
   5859 {
   5860 	switch (func_id) {
   5861 	case BPF_FUNC_skb_load_bytes:
   5862 		return &bpf_skb_load_bytes_proto;
   5863 	default:
   5864 		return bpf_base_func_proto(func_id);
   5865 	}
   5866 }
   5867 
   5868 static const struct bpf_func_proto *
   5869 lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
   5870 {
   5871 	switch (func_id) {
   5872 	case BPF_FUNC_skb_load_bytes:
   5873 		return &bpf_skb_load_bytes_proto;
   5874 	case BPF_FUNC_skb_pull_data:
   5875 		return &bpf_skb_pull_data_proto;
   5876 	case BPF_FUNC_csum_diff:
   5877 		return &bpf_csum_diff_proto;
   5878 	case BPF_FUNC_get_cgroup_classid:
   5879 		return &bpf_get_cgroup_classid_proto;
   5880 	case BPF_FUNC_get_route_realm:
   5881 		return &bpf_get_route_realm_proto;
   5882 	case BPF_FUNC_get_hash_recalc:
   5883 		return &bpf_get_hash_recalc_proto;
   5884 	case BPF_FUNC_perf_event_output:
   5885 		return &bpf_skb_event_output_proto;
   5886 	case BPF_FUNC_get_smp_processor_id:
   5887 		return &bpf_get_smp_processor_id_proto;
   5888 	case BPF_FUNC_skb_under_cgroup:
   5889 		return &bpf_skb_under_cgroup_proto;
   5890 	default:
   5891 		return bpf_base_func_proto(func_id);
   5892 	}
   5893 }
   5894 
   5895 static const struct bpf_func_proto *
   5896 lwt_in_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
   5897 {
   5898 	switch (func_id) {
   5899 	case BPF_FUNC_lwt_push_encap:
   5900 		return &bpf_lwt_in_push_encap_proto;
   5901 	default:
   5902 		return lwt_out_func_proto(func_id, prog);
   5903 	}
   5904 }
   5905 
   5906 static const struct bpf_func_proto *
   5907 lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
   5908 {
   5909 	switch (func_id) {
   5910 	case BPF_FUNC_skb_get_tunnel_key:
   5911 		return &bpf_skb_get_tunnel_key_proto;
   5912 	case BPF_FUNC_skb_set_tunnel_key:
   5913 		return bpf_get_skb_set_tunnel_proto(func_id);
   5914 	case BPF_FUNC_skb_get_tunnel_opt:
   5915 		return &bpf_skb_get_tunnel_opt_proto;
   5916 	case BPF_FUNC_skb_set_tunnel_opt:
   5917 		return bpf_get_skb_set_tunnel_proto(func_id);
   5918 	case BPF_FUNC_redirect:
   5919 		return &bpf_redirect_proto;
   5920 	case BPF_FUNC_clone_redirect:
   5921 		return &bpf_clone_redirect_proto;
   5922 	case BPF_FUNC_skb_change_tail:
   5923 		return &bpf_skb_change_tail_proto;
   5924 	case BPF_FUNC_skb_change_head:
   5925 		return &bpf_skb_change_head_proto;
   5926 	case BPF_FUNC_skb_store_bytes:
   5927 		return &bpf_skb_store_bytes_proto;
   5928 	case BPF_FUNC_csum_update:
   5929 		return &bpf_csum_update_proto;
   5930 	case BPF_FUNC_l3_csum_replace:
   5931 		return &bpf_l3_csum_replace_proto;
   5932 	case BPF_FUNC_l4_csum_replace:
   5933 		return &bpf_l4_csum_replace_proto;
   5934 	case BPF_FUNC_set_hash_invalid:
   5935 		return &bpf_set_hash_invalid_proto;
   5936 	case BPF_FUNC_lwt_push_encap:
   5937 		return &bpf_lwt_xmit_push_encap_proto;
   5938 	default:
   5939 		return lwt_out_func_proto(func_id, prog);
   5940 	}
   5941 }
   5942 
   5943 static const struct bpf_func_proto *
   5944 lwt_seg6local_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
   5945 {
   5946 	switch (func_id) {
   5947 #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
   5948 	case BPF_FUNC_lwt_seg6_store_bytes:
   5949 		return &bpf_lwt_seg6_store_bytes_proto;
   5950 	case BPF_FUNC_lwt_seg6_action:
   5951 		return &bpf_lwt_seg6_action_proto;
   5952 	case BPF_FUNC_lwt_seg6_adjust_srh:
   5953 		return &bpf_lwt_seg6_adjust_srh_proto;
   5954 #endif
   5955 	default:
   5956 		return lwt_out_func_proto(func_id, prog);
   5957 	}
   5958 }
   5959 
   5960 static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type,
   5961 				    const struct bpf_prog *prog,
   5962 				    struct bpf_insn_access_aux *info)
   5963 {
   5964 	const int size_default = sizeof(__u32);
   5965 
   5966 	if (off < 0 || off >= sizeof(struct __sk_buff))
   5967 		return false;
   5968 
   5969 	/* The verifier guarantees that size > 0. */
   5970 	if (off % size != 0)
   5971 		return false;
   5972 
   5973 	switch (off) {
   5974 	case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
   5975 		if (off + size > offsetofend(struct __sk_buff, cb[4]))
   5976 			return false;
   5977 		break;
   5978 	case bpf_ctx_range_till(struct __sk_buff, remote_ip6[0], remote_ip6[3]):
   5979 	case bpf_ctx_range_till(struct __sk_buff, local_ip6[0], local_ip6[3]):
   5980 	case bpf_ctx_range_till(struct __sk_buff, remote_ip4, remote_ip4):
   5981 	case bpf_ctx_range_till(struct __sk_buff, local_ip4, local_ip4):
   5982 	case bpf_ctx_range(struct __sk_buff, data):
   5983 	case bpf_ctx_range(struct __sk_buff, data_meta):
   5984 	case bpf_ctx_range(struct __sk_buff, data_end):
   5985 		if (size != size_default)
   5986 			return false;
   5987 		break;
   5988 	case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
   5989 		if (size != sizeof(__u64))
   5990 			return false;
   5991 		break;
   5992 	case bpf_ctx_range(struct __sk_buff, tstamp):
   5993 		if (size != sizeof(__u64))
   5994 			return false;
   5995 		break;
   5996 	case offsetof(struct __sk_buff, sk):
   5997 		if (type == BPF_WRITE || size != sizeof(__u64))
   5998 			return false;
   5999 		info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
   6000 		break;
   6001 	default:
   6002 		/* Only narrow read access allowed for now. */
   6003 		if (type == BPF_WRITE) {
   6004 			if (size != size_default)
   6005 				return false;
   6006 		} else {
   6007 			bpf_ctx_record_field_size(info, size_default);
   6008 			if (!bpf_ctx_narrow_access_ok(off, size, size_default))
   6009 				return false;
   6010 		}
   6011 	}
   6012 
   6013 	return true;
   6014 }
   6015 
   6016 static bool sk_filter_is_valid_access(int off, int size,
   6017 				      enum bpf_access_type type,
   6018 				      const struct bpf_prog *prog,
   6019 				      struct bpf_insn_access_aux *info)
   6020 {
   6021 	switch (off) {
   6022 	case bpf_ctx_range(struct __sk_buff, tc_classid):
   6023 	case bpf_ctx_range(struct __sk_buff, data):
   6024 	case bpf_ctx_range(struct __sk_buff, data_meta):
   6025 	case bpf_ctx_range(struct __sk_buff, data_end):
   6026 	case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
   6027 	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
   6028 	case bpf_ctx_range(struct __sk_buff, tstamp):
   6029 	case bpf_ctx_range(struct __sk_buff, wire_len):
   6030 		return false;
   6031 	}
   6032 
   6033 	if (type == BPF_WRITE) {
   6034 		switch (off) {
   6035 		case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
   6036 			break;
   6037 		default:
   6038 			return false;
   6039 		}
   6040 	}
   6041 
   6042 	return bpf_skb_is_valid_access(off, size, type, prog, info);
   6043 }
   6044 
   6045 static bool cg_skb_is_valid_access(int off, int size,
   6046 				   enum bpf_access_type type,
   6047 				   const struct bpf_prog *prog,
   6048 				   struct bpf_insn_access_aux *info)
   6049 {
   6050 	switch (off) {
   6051 	case bpf_ctx_range(struct __sk_buff, tc_classid):
   6052 	case bpf_ctx_range(struct __sk_buff, data_meta):
   6053 	case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
   6054 	case bpf_ctx_range(struct __sk_buff, wire_len):
   6055 		return false;
   6056 	case bpf_ctx_range(struct __sk_buff, data):
   6057 	case bpf_ctx_range(struct __sk_buff, data_end):
   6058 		if (!capable(CAP_SYS_ADMIN))
   6059 			return false;
   6060 		break;
   6061 	}
   6062 
   6063 	if (type == BPF_WRITE) {
   6064 		switch (off) {
   6065 		case bpf_ctx_range(struct __sk_buff, mark):
   6066 		case bpf_ctx_range(struct __sk_buff, priority):
   6067 		case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
   6068 			break;
   6069 		case bpf_ctx_range(struct __sk_buff, tstamp):
   6070 			if (!capable(CAP_SYS_ADMIN))
   6071 				return false;
   6072 			break;
   6073 		default:
   6074 			return false;
   6075 		}
   6076 	}
   6077 
   6078 	switch (off) {
   6079 	case bpf_ctx_range(struct __sk_buff, data):
   6080 		info->reg_type = PTR_TO_PACKET;
   6081 		break;
   6082 	case bpf_ctx_range(struct __sk_buff, data_end):
   6083 		info->reg_type = PTR_TO_PACKET_END;
   6084 		break;
   6085 	}
   6086 
   6087 	return bpf_skb_is_valid_access(off, size, type, prog, info);
   6088 }
   6089 
   6090 static bool lwt_is_valid_access(int off, int size,
   6091 				enum bpf_access_type type,
   6092 				const struct bpf_prog *prog,
   6093 				struct bpf_insn_access_aux *info)
   6094 {
   6095 	switch (off) {
   6096 	case bpf_ctx_range(struct __sk_buff, tc_classid):
   6097 	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
   6098 	case bpf_ctx_range(struct __sk_buff, data_meta):
   6099 	case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
   6100 	case bpf_ctx_range(struct __sk_buff, tstamp):
   6101 	case bpf_ctx_range(struct __sk_buff, wire_len):
   6102 		return false;
   6103 	}
   6104 
   6105 	if (type == BPF_WRITE) {
   6106 		switch (off) {
   6107 		case bpf_ctx_range(struct __sk_buff, mark):
   6108 		case bpf_ctx_range(struct __sk_buff, priority):
   6109 		case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
   6110 			break;
   6111 		default:
   6112 			return false;
   6113 		}
   6114 	}
   6115 
   6116 	switch (off) {
   6117 	case bpf_ctx_range(struct __sk_buff, data):
   6118 		info->reg_type = PTR_TO_PACKET;
   6119 		break;
   6120 	case bpf_ctx_range(struct __sk_buff, data_end):
   6121 		info->reg_type = PTR_TO_PACKET_END;
   6122 		break;
   6123 	}
   6124 
   6125 	return bpf_skb_is_valid_access(off, size, type, prog, info);
   6126 }
   6127 
   6128 /* Attach type specific accesses */
   6129 static bool __sock_filter_check_attach_type(int off,
   6130 					    enum bpf_access_type access_type,
   6131 					    enum bpf_attach_type attach_type)
   6132 {
   6133 	switch (off) {
   6134 	case offsetof(struct bpf_sock, bound_dev_if):
   6135 	case offsetof(struct bpf_sock, mark):
   6136 	case offsetof(struct bpf_sock, priority):
   6137 		switch (attach_type) {
   6138 		case BPF_CGROUP_INET_SOCK_CREATE:
   6139 			goto full_access;
   6140 		default:
   6141 			return false;
   6142 		}
   6143 	case bpf_ctx_range(struct bpf_sock, src_ip4):
   6144 		switch (attach_type) {
   6145 		case BPF_CGROUP_INET4_POST_BIND:
   6146 			goto read_only;
   6147 		default:
   6148 			return false;
   6149 		}
   6150 	case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
   6151 		switch (attach_type) {
   6152 		case BPF_CGROUP_INET6_POST_BIND:
   6153 			goto read_only;
   6154 		default:
   6155 			return false;
   6156 		}
   6157 	case bpf_ctx_range(struct bpf_sock, src_port):
   6158 		switch (attach_type) {
   6159 		case BPF_CGROUP_INET4_POST_BIND:
   6160 		case BPF_CGROUP_INET6_POST_BIND:
   6161 			goto read_only;
   6162 		default:
   6163 			return false;
   6164 		}
   6165 	}
   6166 read_only:
   6167 	return access_type == BPF_READ;
   6168 full_access:
   6169 	return true;
   6170 }
   6171 
   6172 bool bpf_sock_common_is_valid_access(int off, int size,
   6173 				     enum bpf_access_type type,
   6174 				     struct bpf_insn_access_aux *info)
   6175 {
   6176 	switch (off) {
   6177 	case bpf_ctx_range_till(struct bpf_sock, type, priority):
   6178 		return false;
   6179 	default:
   6180 		return bpf_sock_is_valid_access(off, size, type, info);
   6181 	}
   6182 }
   6183 
   6184 bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
   6185 			      struct bpf_insn_access_aux *info)
   6186 {
   6187 	const int size_default = sizeof(__u32);
   6188 
   6189 	if (off < 0 || off >= sizeof(struct bpf_sock))
   6190 		return false;
   6191 	if (off % size != 0)
   6192 		return false;
   6193 
   6194 	switch (off) {
   6195 	case offsetof(struct bpf_sock, state):
   6196 	case offsetof(struct bpf_sock, family):
   6197 	case offsetof(struct bpf_sock, type):
   6198 	case offsetof(struct bpf_sock, protocol):
   6199 	case offsetof(struct bpf_sock, dst_port):
   6200 	case offsetof(struct bpf_sock, src_port):
   6201 	case bpf_ctx_range(struct bpf_sock, src_ip4):
   6202 	case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
   6203 	case bpf_ctx_range(struct bpf_sock, dst_ip4):
   6204 	case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
   6205 		bpf_ctx_record_field_size(info, size_default);
   6206 		return bpf_ctx_narrow_access_ok(off, size, size_default);
   6207 	}
   6208 
   6209 	return size == size_default;
   6210 }
   6211 
   6212 static bool sock_filter_is_valid_access(int off, int size,
   6213 					enum bpf_access_type type,
   6214 					const struct bpf_prog *prog,
   6215 					struct bpf_insn_access_aux *info)
   6216 {
   6217 	if (!bpf_sock_is_valid_access(off, size, type, info))
   6218 		return false;
   6219 	return __sock_filter_check_attach_type(off, type,
   6220 					       prog->expected_attach_type);
   6221 }
   6222 
   6223 static int bpf_noop_prologue(struct bpf_insn *insn_buf, bool direct_write,
   6224 			     const struct bpf_prog *prog)
   6225 {
   6226 	/* Neither direct read nor direct write requires any preliminary
   6227 	 * action.
   6228 	 */
   6229 	return 0;
   6230 }
   6231 
   6232 static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write,
   6233 				const struct bpf_prog *prog, int drop_verdict)
   6234 {
   6235 	struct bpf_insn *insn = insn_buf;
   6236 
   6237 	if (!direct_write)
   6238 		return 0;
   6239 
   6240 	/* if (!skb->cloned)
   6241 	 *       goto start;
   6242 	 *
   6243 	 * (Fast-path, otherwise approximation that we might be
   6244 	 *  a clone, do the rest in helper.)
   6245 	 */
   6246 	*insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_6, BPF_REG_1, CLONED_OFFSET());
   6247 	*insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_6, CLONED_MASK);
   6248 	*insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 7);
   6249 
   6250 	/* ret = bpf_skb_pull_data(skb, 0); */
   6251 	*insn++ = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
   6252 	*insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_2, BPF_REG_2);
   6253 	*insn++ = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
   6254 			       BPF_FUNC_skb_pull_data);
   6255 	/* if (!ret)
   6256 	 *      goto restore;
   6257 	 * return TC_ACT_SHOT;
   6258 	 */
   6259 	*insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2);
   6260 	*insn++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, drop_verdict);
   6261 	*insn++ = BPF_EXIT_INSN();
   6262 
   6263 	/* restore: */
   6264 	*insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
   6265 	/* start: */
   6266 	*insn++ = prog->insnsi[0];
   6267 
   6268 	return insn - insn_buf;
   6269 }
   6270 
   6271 static int bpf_gen_ld_abs(const struct bpf_insn *orig,
   6272 			  struct bpf_insn *insn_buf)
   6273 {
   6274 	bool indirect = BPF_MODE(orig->code) == BPF_IND;
   6275 	struct bpf_insn *insn = insn_buf;
   6276 
   6277 	/* We're guaranteed here that CTX is in R6. */
   6278 	*insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_CTX);
   6279 	if (!indirect) {
   6280 		*insn++ = BPF_MOV64_IMM(BPF_REG_2, orig->imm);
   6281 	} else {
   6282 		*insn++ = BPF_MOV64_REG(BPF_REG_2, orig->src_reg);
   6283 		if (orig->imm)
   6284 			*insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, orig->imm);
   6285 	}
   6286 
   6287 	switch (BPF_SIZE(orig->code)) {
   6288 	case BPF_B:
   6289 		*insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_8_no_cache);
   6290 		break;
   6291 	case BPF_H:
   6292 		*insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_16_no_cache);
   6293 		break;
   6294 	case BPF_W:
   6295 		*insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_32_no_cache);
   6296 		break;
   6297 	}
   6298 
   6299 	*insn++ = BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 2);
   6300 	*insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_0, BPF_REG_0);
   6301 	*insn++ = BPF_EXIT_INSN();
   6302 
   6303 	return insn - insn_buf;
   6304 }
   6305 
   6306 static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
   6307 			       const struct bpf_prog *prog)
   6308 {
   6309 	return bpf_unclone_prologue(insn_buf, direct_write, prog, TC_ACT_SHOT);
   6310 }
   6311 
   6312 static bool tc_cls_act_is_valid_access(int off, int size,
   6313 				       enum bpf_access_type type,
   6314 				       const struct bpf_prog *prog,
   6315 				       struct bpf_insn_access_aux *info)
   6316 {
   6317 	if (type == BPF_WRITE) {
   6318 		switch (off) {
   6319 		case bpf_ctx_range(struct __sk_buff, mark):
   6320 		case bpf_ctx_range(struct __sk_buff, tc_index):
   6321 		case bpf_ctx_range(struct __sk_buff, priority):
   6322 		case bpf_ctx_range(struct __sk_buff, tc_classid):
   6323 		case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
   6324 		case bpf_ctx_range(struct __sk_buff, tstamp):
   6325 		case bpf_ctx_range(struct __sk_buff, queue_mapping):
   6326 			break;
   6327 		default:
   6328 			return false;
   6329 		}
   6330 	}
   6331 
   6332 	switch (off) {
   6333 	case bpf_ctx_range(struct __sk_buff, data):
   6334 		info->reg_type = PTR_TO_PACKET;
   6335 		break;
   6336 	case bpf_ctx_range(struct __sk_buff, data_meta):
   6337 		info->reg_type = PTR_TO_PACKET_META;
   6338 		break;
   6339 	case bpf_ctx_range(struct __sk_buff, data_end):
   6340 		info->reg_type = PTR_TO_PACKET_END;
   6341 		break;
   6342 	case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
   6343 	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
   6344 		return false;
   6345 	}
   6346 
   6347 	return bpf_skb_is_valid_access(off, size, type, prog, info);
   6348 }
   6349 
   6350 static bool __is_valid_xdp_access(int off, int size)
   6351 {
   6352 	if (off < 0 || off >= sizeof(struct xdp_md))
   6353 		return false;
   6354 	if (off % size != 0)
   6355 		return false;
   6356 	if (size != sizeof(__u32))
   6357 		return false;
   6358 
   6359 	return true;
   6360 }
   6361 
   6362 static bool xdp_is_valid_access(int off, int size,
   6363 				enum bpf_access_type type,
   6364 				const struct bpf_prog *prog,
   6365 				struct bpf_insn_access_aux *info)
   6366 {
   6367 	if (type == BPF_WRITE) {
   6368 		if (bpf_prog_is_dev_bound(prog->aux)) {
   6369 			switch (off) {
   6370 			case offsetof(struct xdp_md, rx_queue_index):
   6371 				return __is_valid_xdp_access(off, size);
   6372 			}
   6373 		}
   6374 		return false;
   6375 	}
   6376 
   6377 	switch (off) {
   6378 	case offsetof(struct xdp_md, data):
   6379 		info->reg_type = PTR_TO_PACKET;
   6380 		break;
   6381 	case offsetof(struct xdp_md, data_meta):
   6382 		info->reg_type = PTR_TO_PACKET_META;
   6383 		break;
   6384 	case offsetof(struct xdp_md, data_end):
   6385 		info->reg_type = PTR_TO_PACKET_END;
   6386 		break;
   6387 	}
   6388 
   6389 	return __is_valid_xdp_access(off, size);
   6390 }
   6391 
   6392 void bpf_warn_invalid_xdp_action(u32 act)
   6393 {
   6394 	const u32 act_max = XDP_REDIRECT;
   6395 
   6396 	WARN_ONCE(1, "%s XDP return value %u, expect packet loss!\n",
   6397 		  act > act_max ? "Illegal" : "Driver unsupported",
   6398 		  act);
   6399 }
   6400 EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
   6401 
   6402 static bool sock_addr_is_valid_access(int off, int size,
   6403 				      enum bpf_access_type type,
   6404 				      const struct bpf_prog *prog,
   6405 				      struct bpf_insn_access_aux *info)
   6406 {
   6407 	const int size_default = sizeof(__u32);
   6408 
   6409 	if (off < 0 || off >= sizeof(struct bpf_sock_addr))
   6410 		return false;
   6411 	if (off % size != 0)
   6412 		return false;
   6413 
   6414 	/* Disallow access to IPv6 fields from IPv4 contex and vise
   6415 	 * versa.
   6416 	 */
   6417 	switch (off) {
   6418 	case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
   6419 		switch (prog->expected_attach_type) {
   6420 		case BPF_CGROUP_INET4_BIND:
   6421 		case BPF_CGROUP_INET4_CONNECT:
   6422 		case BPF_CGROUP_UDP4_SENDMSG:
   6423 			break;
   6424 		default:
   6425 			return false;
   6426 		}
   6427 		break;
   6428 	case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
   6429 		switch (prog->expected_attach_type) {
   6430 		case BPF_CGROUP_INET6_BIND:
   6431 		case BPF_CGROUP_INET6_CONNECT:
   6432 		case BPF_CGROUP_UDP6_SENDMSG:
   6433 			break;
   6434 		default:
   6435 			return false;
   6436 		}
   6437 		break;
   6438 	case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4):
   6439 		switch (prog->expected_attach_type) {
   6440 		case BPF_CGROUP_UDP4_SENDMSG:
   6441 			break;
   6442 		default:
   6443 			return false;
   6444 		}
   6445 		break;
   6446 	case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
   6447 				msg_src_ip6[3]):
   6448 		switch (prog->expected_attach_type) {
   6449 		case BPF_CGROUP_UDP6_SENDMSG:
   6450 			break;
   6451 		default:
   6452 			return false;
   6453 		}
   6454 		break;
   6455 	}
   6456 
   6457 	switch (off) {
   6458 	case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
   6459 	case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
   6460 	case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4):
   6461 	case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
   6462 				msg_src_ip6[3]):
   6463 		/* Only narrow read access allowed for now. */
   6464 		if (type == BPF_READ) {
   6465 			bpf_ctx_record_field_size(info, size_default);
   6466 			if (!bpf_ctx_narrow_access_ok(off, size, size_default))
   6467 				return false;
   6468 		} else {
   6469 			if (size != size_default)
   6470 				return false;
   6471 		}
   6472 		break;
   6473 	case bpf_ctx_range(struct bpf_sock_addr, user_port):
   6474 		if (size != size_default)
   6475 			return false;
   6476 		break;
   6477 	default:
   6478 		if (type == BPF_READ) {
   6479 			if (size != size_default)
   6480 				return false;
   6481 		} else {
   6482 			return false;
   6483 		}
   6484 	}
   6485 
   6486 	return true;
   6487 }
   6488 
   6489 static bool sock_ops_is_valid_access(int off, int size,
   6490 				     enum bpf_access_type type,
   6491 				     const struct bpf_prog *prog,
   6492 				     struct bpf_insn_access_aux *info)
   6493 {
   6494 	const int size_default = sizeof(__u32);
   6495 
   6496 	if (off < 0 || off >= sizeof(struct bpf_sock_ops))
   6497 		return false;
   6498 
   6499 	/* The verifier guarantees that size > 0. */
   6500 	if (off % size != 0)
   6501 		return false;
   6502 
   6503 	if (type == BPF_WRITE) {
   6504 		switch (off) {
   6505 		case offsetof(struct bpf_sock_ops, reply):
   6506 		case offsetof(struct bpf_sock_ops, sk_txhash):
   6507 			if (size != size_default)
   6508 				return false;
   6509 			break;
   6510 		default:
   6511 			return false;
   6512 		}
   6513 	} else {
   6514 		switch (off) {
   6515 		case bpf_ctx_range_till(struct bpf_sock_ops, bytes_received,
   6516 					bytes_acked):
   6517 			if (size != sizeof(__u64))
   6518 				return false;
   6519 			break;
   6520 		default:
   6521 			if (size != size_default)
   6522 				return false;
   6523 			break;
   6524 		}
   6525 	}
   6526 
   6527 	return true;
   6528 }
   6529 
   6530 static int sk_skb_prologue(struct bpf_insn *insn_buf, bool direct_write,
   6531 			   const struct bpf_prog *prog)
   6532 {
   6533 	return bpf_unclone_prologue(insn_buf, direct_write, prog, SK_DROP);
   6534 }
   6535 
   6536 static bool sk_skb_is_valid_access(int off, int size,
   6537 				   enum bpf_access_type type,
   6538 				   const struct bpf_prog *prog,
   6539 				   struct bpf_insn_access_aux *info)
   6540 {
   6541 	switch (off) {
   6542 	case bpf_ctx_range(struct __sk_buff, tc_classid):
   6543 	case bpf_ctx_range(struct __sk_buff, data_meta):
   6544 	case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
   6545 	case bpf_ctx_range(struct __sk_buff, tstamp):
   6546 	case bpf_ctx_range(struct __sk_buff, wire_len):
   6547 		return false;
   6548 	}
   6549 
   6550 	if (type == BPF_WRITE) {
   6551 		switch (off) {
   6552 		case bpf_ctx_range(struct __sk_buff, tc_index):
   6553 		case bpf_ctx_range(struct __sk_buff, priority):
   6554 			break;
   6555 		default:
   6556 			return false;
   6557 		}
   6558 	}
   6559 
   6560 	switch (off) {
   6561 	case bpf_ctx_range(struct __sk_buff, mark):
   6562 		return false;
   6563 	case bpf_ctx_range(struct __sk_buff, data):
   6564 		info->reg_type = PTR_TO_PACKET;
   6565 		break;
   6566 	case bpf_ctx_range(struct __sk_buff, data_end):
   6567 		info->reg_type = PTR_TO_PACKET_END;
   6568 		break;
   6569 	}
   6570 
   6571 	return bpf_skb_is_valid_access(off, size, type, prog, info);
   6572 }
   6573 
   6574 static bool sk_msg_is_valid_access(int off, int size,
   6575 				   enum bpf_access_type type,
   6576 				   const struct bpf_prog *prog,
   6577 				   struct bpf_insn_access_aux *info)
   6578 {
   6579 	if (type == BPF_WRITE)
   6580 		return false;
   6581 
   6582 	if (off % size != 0)
   6583 		return false;
   6584 
   6585 	switch (off) {
   6586 	case offsetof(struct sk_msg_md, data):
   6587 		info->reg_type = PTR_TO_PACKET;
   6588 		if (size != sizeof(__u64))
   6589 			return false;
   6590 		break;
   6591 	case offsetof(struct sk_msg_md, data_end):
   6592 		info->reg_type = PTR_TO_PACKET_END;
   6593 		if (size != sizeof(__u64))
   6594 			return false;
   6595 		break;
   6596 	case bpf_ctx_range(struct sk_msg_md, family):
   6597 	case bpf_ctx_range(struct sk_msg_md, remote_ip4):
   6598 	case bpf_ctx_range(struct sk_msg_md, local_ip4):
   6599 	case bpf_ctx_range_till(struct sk_msg_md, remote_ip6[0], remote_ip6[3]):
   6600 	case bpf_ctx_range_till(struct sk_msg_md, local_ip6[0], local_ip6[3]):
   6601 	case bpf_ctx_range(struct sk_msg_md, remote_port):
   6602 	case bpf_ctx_range(struct sk_msg_md, local_port):
   6603 	case bpf_ctx_range(struct sk_msg_md, size):
   6604 		if (size != sizeof(__u32))
   6605 			return false;
   6606 		break;
   6607 	default:
   6608 		return false;
   6609 	}
   6610 	return true;
   6611 }
   6612 
   6613 static bool flow_dissector_is_valid_access(int off, int size,
   6614 					   enum bpf_access_type type,
   6615 					   const struct bpf_prog *prog,
   6616 					   struct bpf_insn_access_aux *info)
   6617 {
   6618 	if (type == BPF_WRITE)
   6619 		return false;
   6620 
   6621 	switch (off) {
   6622 	case bpf_ctx_range(struct __sk_buff, data):
   6623 		info->reg_type = PTR_TO_PACKET;
   6624 		break;
   6625 	case bpf_ctx_range(struct __sk_buff, data_end):
   6626 		info->reg_type = PTR_TO_PACKET_END;
   6627 		break;
   6628 	case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
   6629 		info->reg_type = PTR_TO_FLOW_KEYS;
   6630 		break;
   6631 	default:
   6632 		return false;
   6633 	}
   6634 
   6635 	return bpf_skb_is_valid_access(off, size, type, prog, info);
   6636 }
   6637 
   6638 static u32 bpf_convert_ctx_access(enum bpf_access_type type,
   6639 				  const struct bpf_insn *si,
   6640 				  struct bpf_insn *insn_buf,
   6641 				  struct bpf_prog *prog, u32 *target_size)
   6642 {
   6643 	struct bpf_insn *insn = insn_buf;
   6644 	int off;
   6645 
   6646 	switch (si->off) {
   6647 	case offsetof(struct __sk_buff, len):
   6648 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
   6649 				      bpf_target_off(struct sk_buff, len, 4,
   6650 						     target_size));
   6651 		break;
   6652 
   6653 	case offsetof(struct __sk_buff, protocol):
   6654 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
   6655 				      bpf_target_off(struct sk_buff, protocol, 2,
   6656 						     target_size));
   6657 		break;
   6658 
   6659 	case offsetof(struct __sk_buff, vlan_proto):
   6660 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
   6661 				      bpf_target_off(struct sk_buff, vlan_proto, 2,
   6662 						     target_size));
   6663 		break;
   6664 
   6665 	case offsetof(struct __sk_buff, priority):
   6666 		if (type == BPF_WRITE)
   6667 			*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
   6668 					      bpf_target_off(struct sk_buff, priority, 4,
   6669 							     target_size));
   6670 		else
   6671 			*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
   6672 					      bpf_target_off(struct sk_buff, priority, 4,
   6673 							     target_size));
   6674 		break;
   6675 
   6676 	case offsetof(struct __sk_buff, ingress_ifindex):
   6677 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
   6678 				      bpf_target_off(struct sk_buff, skb_iif, 4,
   6679 						     target_size));
   6680 		break;
   6681 
   6682 	case offsetof(struct __sk_buff, ifindex):
   6683 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
   6684 				      si->dst_reg, si->src_reg,
   6685 				      offsetof(struct sk_buff, dev));
   6686 		*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
   6687 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
   6688 				      bpf_target_off(struct net_device, ifindex, 4,
   6689 						     target_size));
   6690 		break;
   6691 
   6692 	case offsetof(struct __sk_buff, hash):
   6693 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
   6694 				      bpf_target_off(struct sk_buff, hash, 4,
   6695 						     target_size));
   6696 		break;
   6697 
   6698 	case offsetof(struct __sk_buff, mark):
   6699 		if (type == BPF_WRITE)
   6700 			*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
   6701 					      bpf_target_off(struct sk_buff, mark, 4,
   6702 							     target_size));
   6703 		else
   6704 			*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
   6705 					      bpf_target_off(struct sk_buff, mark, 4,
   6706 							     target_size));
   6707 		break;
   6708 
   6709 	case offsetof(struct __sk_buff, pkt_type):
   6710 		*target_size = 1;
   6711 		*insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg,
   6712 				      PKT_TYPE_OFFSET());
   6713 		*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, PKT_TYPE_MAX);
   6714 #ifdef __BIG_ENDIAN_BITFIELD
   6715 		*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 5);
   6716 #endif
   6717 		break;
   6718 
   6719 	case offsetof(struct __sk_buff, queue_mapping):
   6720 		if (type == BPF_WRITE) {
   6721 			*insn++ = BPF_JMP_IMM(BPF_JGE, si->src_reg, NO_QUEUE_MAPPING, 1);
   6722 			*insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg,
   6723 					      bpf_target_off(struct sk_buff,
   6724 							     queue_mapping,
   6725 							     2, target_size));
   6726 		} else {
   6727 			*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
   6728 					      bpf_target_off(struct sk_buff,
   6729 							     queue_mapping,
   6730 							     2, target_size));
   6731 		}
   6732 		break;
   6733 
   6734 	case offsetof(struct __sk_buff, vlan_present):
   6735 		*target_size = 1;
   6736 		*insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg,
   6737 				      PKT_VLAN_PRESENT_OFFSET());
   6738 		if (PKT_VLAN_PRESENT_BIT)
   6739 			*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, PKT_VLAN_PRESENT_BIT);
   6740 		if (PKT_VLAN_PRESENT_BIT < 7)
   6741 			*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, 1);
   6742 		break;
   6743 
   6744 	case offsetof(struct __sk_buff, vlan_tci):
   6745 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
   6746 				      bpf_target_off(struct sk_buff, vlan_tci, 2,
   6747 						     target_size));
   6748 		break;
   6749 
   6750 	case offsetof(struct __sk_buff, cb[0]) ...
   6751 	     offsetofend(struct __sk_buff, cb[4]) - 1:
   6752 		BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20);
   6753 		BUILD_BUG_ON((offsetof(struct sk_buff, cb) +
   6754 			      offsetof(struct qdisc_skb_cb, data)) %
   6755 			     sizeof(__u64));
   6756 
   6757 		prog->cb_access = 1;
   6758 		off  = si->off;
   6759 		off -= offsetof(struct __sk_buff, cb[0]);
   6760 		off += offsetof(struct sk_buff, cb);
   6761 		off += offsetof(struct qdisc_skb_cb, data);
   6762 		if (type == BPF_WRITE)
   6763 			*insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg,
   6764 					      si->src_reg, off);
   6765 		else
   6766 			*insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg,
   6767 					      si->src_reg, off);
   6768 		break;
   6769 
   6770 	case offsetof(struct __sk_buff, tc_classid):
   6771 		BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, tc_classid) != 2);
   6772 
   6773 		off  = si->off;
   6774 		off -= offsetof(struct __sk_buff, tc_classid);
   6775 		off += offsetof(struct sk_buff, cb);
   6776 		off += offsetof(struct qdisc_skb_cb, tc_classid);
   6777 		*target_size = 2;
   6778 		if (type == BPF_WRITE)
   6779 			*insn++ = BPF_STX_MEM(BPF_H, si->dst_reg,
   6780 					      si->src_reg, off);
   6781 		else
   6782 			*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg,
   6783 					      si->src_reg, off);
   6784 		break;
   6785 
   6786 	case offsetof(struct __sk_buff, data):
   6787 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
   6788 				      si->dst_reg, si->src_reg,
   6789 				      offsetof(struct sk_buff, data));
   6790 		break;
   6791 
   6792 	case offsetof(struct __sk_buff, data_meta):
   6793 		off  = si->off;
   6794 		off -= offsetof(struct __sk_buff, data_meta);
   6795 		off += offsetof(struct sk_buff, cb);
   6796 		off += offsetof(struct bpf_skb_data_end, data_meta);
   6797 		*insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
   6798 				      si->src_reg, off);
   6799 		break;
   6800 
   6801 	case offsetof(struct __sk_buff, data_end):
   6802 		off  = si->off;
   6803 		off -= offsetof(struct __sk_buff, data_end);
   6804 		off += offsetof(struct sk_buff, cb);
   6805 		off += offsetof(struct bpf_skb_data_end, data_end);
   6806 		*insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
   6807 				      si->src_reg, off);
   6808 		break;
   6809 
   6810 	case offsetof(struct __sk_buff, tc_index):
   6811 #ifdef CONFIG_NET_SCHED
   6812 		if (type == BPF_WRITE)
   6813 			*insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg,
   6814 					      bpf_target_off(struct sk_buff, tc_index, 2,
   6815 							     target_size));
   6816 		else
   6817 			*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
   6818 					      bpf_target_off(struct sk_buff, tc_index, 2,
   6819 							     target_size));
   6820 #else
   6821 		*target_size = 2;
   6822 		if (type == BPF_WRITE)
   6823 			*insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg);
   6824 		else
   6825 			*insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
   6826 #endif
   6827 		break;
   6828 
   6829 	case offsetof(struct __sk_buff, napi_id):
   6830 #if defined(CONFIG_NET_RX_BUSY_POLL)
   6831 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
   6832 				      bpf_target_off(struct sk_buff, napi_id, 4,
   6833 						     target_size));
   6834 		*insn++ = BPF_JMP_IMM(BPF_JGE, si->dst_reg, MIN_NAPI_ID, 1);
   6835 		*insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
   6836 #else
   6837 		*target_size = 4;
   6838 		*insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
   6839 #endif
   6840 		break;
   6841 	case offsetof(struct __sk_buff, family):
   6842 		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
   6843 
   6844 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
   6845 				      si->dst_reg, si->src_reg,
   6846 				      offsetof(struct sk_buff, sk));
   6847 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
   6848 				      bpf_target_off(struct sock_common,
   6849 						     skc_family,
   6850 						     2, target_size));
   6851 		break;
   6852 	case offsetof(struct __sk_buff, remote_ip4):
   6853 		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
   6854 
   6855 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
   6856 				      si->dst_reg, si->src_reg,
   6857 				      offsetof(struct sk_buff, sk));
   6858 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
   6859 				      bpf_target_off(struct sock_common,
   6860 						     skc_daddr,
   6861 						     4, target_size));
   6862 		break;
   6863 	case offsetof(struct __sk_buff, local_ip4):
   6864 		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
   6865 					  skc_rcv_saddr) != 4);
   6866 
   6867 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
   6868 				      si->dst_reg, si->src_reg,
   6869 				      offsetof(struct sk_buff, sk));
   6870 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
   6871 				      bpf_target_off(struct sock_common,
   6872 						     skc_rcv_saddr,
   6873 						     4, target_size));
   6874 		break;
   6875 	case offsetof(struct __sk_buff, remote_ip6[0]) ...
   6876 	     offsetof(struct __sk_buff, remote_ip6[3]):
   6877 #if IS_ENABLED(CONFIG_IPV6)
   6878 		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
   6879 					  skc_v6_daddr.s6_addr32[0]) != 4);
   6880 
   6881 		off = si->off;
   6882 		off -= offsetof(struct __sk_buff, remote_ip6[0]);
   6883 
   6884 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
   6885 				      si->dst_reg, si->src_reg,
   6886 				      offsetof(struct sk_buff, sk));
   6887 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
   6888 				      offsetof(struct sock_common,
   6889 					       skc_v6_daddr.s6_addr32[0]) +
   6890 				      off);
   6891 #else
   6892 		*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
   6893 #endif
   6894 		break;
   6895 	case offsetof(struct __sk_buff, local_ip6[0]) ...
   6896 	     offsetof(struct __sk_buff, local_ip6[3]):
   6897 #if IS_ENABLED(CONFIG_IPV6)
   6898 		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
   6899 					  skc_v6_rcv_saddr.s6_addr32[0]) != 4);
   6900 
   6901 		off = si->off;
   6902 		off -= offsetof(struct __sk_buff, local_ip6[0]);
   6903 
   6904 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
   6905 				      si->dst_reg, si->src_reg,
   6906 				      offsetof(struct sk_buff, sk));
   6907 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
   6908 				      offsetof(struct sock_common,
   6909 					       skc_v6_rcv_saddr.s6_addr32[0]) +
   6910 				      off);
   6911 #else
   6912 		*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
   6913 #endif
   6914 		break;
   6915 
   6916 	case offsetof(struct __sk_buff, remote_port):
   6917 		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
   6918 
   6919 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
   6920 				      si->dst_reg, si->src_reg,
   6921 				      offsetof(struct sk_buff, sk));
   6922 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
   6923 				      bpf_target_off(struct sock_common,
   6924 						     skc_dport,
   6925 						     2, target_size));
   6926 #ifndef __BIG_ENDIAN_BITFIELD
   6927 		*insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
   6928 #endif
   6929 		break;
   6930 
   6931 	case offsetof(struct __sk_buff, local_port):
   6932 		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
   6933 
   6934 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
   6935 				      si->dst_reg, si->src_reg,
   6936 				      offsetof(struct sk_buff, sk));
   6937 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
   6938 				      bpf_target_off(struct sock_common,
   6939 						     skc_num, 2, target_size));
   6940 		break;
   6941 
   6942 	case offsetof(struct __sk_buff, flow_keys):
   6943 		off  = si->off;
   6944 		off -= offsetof(struct __sk_buff, flow_keys);
   6945 		off += offsetof(struct sk_buff, cb);
   6946 		off += offsetof(struct qdisc_skb_cb, flow_keys);
   6947 		*insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
   6948 				      si->src_reg, off);
   6949 		break;
   6950 
   6951 	case offsetof(struct __sk_buff, tstamp):
   6952 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tstamp) != 8);
   6953 
   6954 		if (type == BPF_WRITE)
   6955 			*insn++ = BPF_STX_MEM(BPF_DW,
   6956 					      si->dst_reg, si->src_reg,
   6957 					      bpf_target_off(struct sk_buff,
   6958 							     tstamp, 8,
   6959 							     target_size));
   6960 		else
   6961 			*insn++ = BPF_LDX_MEM(BPF_DW,
   6962 					      si->dst_reg, si->src_reg,
   6963 					      bpf_target_off(struct sk_buff,
   6964 							     tstamp, 8,
   6965 							     target_size));
   6966 		break;
   6967 
   6968 	case offsetof(struct __sk_buff, gso_segs):
   6969 		/* si->dst_reg = skb_shinfo(SKB); */
   6970 #ifdef NET_SKBUFF_DATA_USES_OFFSET
   6971 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head),
   6972 				      si->dst_reg, si->src_reg,
   6973 				      offsetof(struct sk_buff, head));
   6974 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
   6975 				      BPF_REG_AX, si->src_reg,
   6976 				      offsetof(struct sk_buff, end));
   6977 		*insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX);
   6978 #else
   6979 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
   6980 				      si->dst_reg, si->src_reg,
   6981 				      offsetof(struct sk_buff, end));
   6982 #endif
   6983 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_segs),
   6984 				      si->dst_reg, si->dst_reg,
   6985 				      bpf_target_off(struct skb_shared_info,
   6986 						     gso_segs, 2,
   6987 						     target_size));
   6988 		break;
   6989 	case offsetof(struct __sk_buff, wire_len):
   6990 		BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, pkt_len) != 4);
   6991 
   6992 		off = si->off;
   6993 		off -= offsetof(struct __sk_buff, wire_len);
   6994 		off += offsetof(struct sk_buff, cb);
   6995 		off += offsetof(struct qdisc_skb_cb, pkt_len);
   6996 		*target_size = 4;
   6997 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, off);
   6998 		break;
   6999 
   7000 	case offsetof(struct __sk_buff, sk):
   7001 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
   7002 				      si->dst_reg, si->src_reg,
   7003 				      offsetof(struct sk_buff, sk));
   7004 		break;
   7005 	}
   7006 
   7007 	return insn - insn_buf;
   7008 }
   7009 
   7010 u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
   7011 				const struct bpf_insn *si,
   7012 				struct bpf_insn *insn_buf,
   7013 				struct bpf_prog *prog, u32 *target_size)
   7014 {
   7015 	struct bpf_insn *insn = insn_buf;
   7016 	int off;
   7017 
   7018 	switch (si->off) {
   7019 	case offsetof(struct bpf_sock, bound_dev_if):
   7020 		BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_bound_dev_if) != 4);
   7021 
   7022 		if (type == BPF_WRITE)
   7023 			*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
   7024 					offsetof(struct sock, sk_bound_dev_if));
   7025 		else
   7026 			*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
   7027 				      offsetof(struct sock, sk_bound_dev_if));
   7028 		break;
   7029 
   7030 	case offsetof(struct bpf_sock, mark):
   7031 		BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_mark) != 4);
   7032 
   7033 		if (type == BPF_WRITE)
   7034 			*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
   7035 					offsetof(struct sock, sk_mark));
   7036 		else
   7037 			*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
   7038 				      offsetof(struct sock, sk_mark));
   7039 		break;
   7040 
   7041 	case offsetof(struct bpf_sock, priority):
   7042 		BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_priority) != 4);
   7043 
   7044 		if (type == BPF_WRITE)
   7045 			*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
   7046 					offsetof(struct sock, sk_priority));
   7047 		else
   7048 			*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
   7049 				      offsetof(struct sock, sk_priority));
   7050 		break;
   7051 
   7052 	case offsetof(struct bpf_sock, family):
   7053 		*insn++ = BPF_LDX_MEM(
   7054 			BPF_FIELD_SIZEOF(struct sock_common, skc_family),
   7055 			si->dst_reg, si->src_reg,
   7056 			bpf_target_off(struct sock_common,
   7057 				       skc_family,
   7058 				       FIELD_SIZEOF(struct sock_common,
   7059 						    skc_family),
   7060 				       target_size));
   7061 		break;
   7062 
   7063 	case offsetof(struct bpf_sock, type):
   7064 		BUILD_BUG_ON(HWEIGHT32(SK_FL_TYPE_MASK) != BITS_PER_BYTE * 2);
   7065 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
   7066 				      offsetof(struct sock, __sk_flags_offset));
   7067 		*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK);
   7068 		*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT);
   7069 		*target_size = 2;
   7070 		break;
   7071 
   7072 	case offsetof(struct bpf_sock, protocol):
   7073 		BUILD_BUG_ON(HWEIGHT32(SK_FL_PROTO_MASK) != BITS_PER_BYTE);
   7074 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
   7075 				      offsetof(struct sock, __sk_flags_offset));
   7076 		*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
   7077 		*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT);
   7078 		*target_size = 1;
   7079 		break;
   7080 
   7081 	case offsetof(struct bpf_sock, src_ip4):
   7082 		*insn++ = BPF_LDX_MEM(
   7083 			BPF_SIZE(si->code), si->dst_reg, si->src_reg,
   7084 			bpf_target_off(struct sock_common, skc_rcv_saddr,
   7085 				       FIELD_SIZEOF(struct sock_common,
   7086 						    skc_rcv_saddr),
   7087 				       target_size));
   7088 		break;
   7089 
   7090 	case offsetof(struct bpf_sock, dst_ip4):
   7091 		*insn++ = BPF_LDX_MEM(
   7092 			BPF_SIZE(si->code), si->dst_reg, si->src_reg,
   7093 			bpf_target_off(struct sock_common, skc_daddr,
   7094 				       FIELD_SIZEOF(struct sock_common,
   7095 						    skc_daddr),
   7096 				       target_size));
   7097 		break;
   7098 
   7099 	case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
   7100 #if IS_ENABLED(CONFIG_IPV6)
   7101 		off = si->off;
   7102 		off -= offsetof(struct bpf_sock, src_ip6[0]);
   7103 		*insn++ = BPF_LDX_MEM(
   7104 			BPF_SIZE(si->code), si->dst_reg, si->src_reg,
   7105 			bpf_target_off(
   7106 				struct sock_common,
   7107 				skc_v6_rcv_saddr.s6_addr32[0],
   7108 				FIELD_SIZEOF(struct sock_common,
   7109 					     skc_v6_rcv_saddr.s6_addr32[0]),
   7110 				target_size) + off);
   7111 #else
   7112 		(void)off;
   7113 		*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
   7114 #endif
   7115 		break;
   7116 
   7117 	case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
   7118 #if IS_ENABLED(CONFIG_IPV6)
   7119 		off = si->off;
   7120 		off -= offsetof(struct bpf_sock, dst_ip6[0]);
   7121 		*insn++ = BPF_LDX_MEM(
   7122 			BPF_SIZE(si->code), si->dst_reg, si->src_reg,
   7123 			bpf_target_off(struct sock_common,
   7124 				       skc_v6_daddr.s6_addr32[0],
   7125 				       FIELD_SIZEOF(struct sock_common,
   7126 						    skc_v6_daddr.s6_addr32[0]),
   7127 				       target_size) + off);
   7128 #else
   7129 		*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
   7130 		*target_size = 4;
   7131 #endif
   7132 		break;
   7133 
   7134 	case offsetof(struct bpf_sock, src_port):
   7135 		*insn++ = BPF_LDX_MEM(
   7136 			BPF_FIELD_SIZEOF(struct sock_common, skc_num),
   7137 			si->dst_reg, si->src_reg,
   7138 			bpf_target_off(struct sock_common, skc_num,
   7139 				       FIELD_SIZEOF(struct sock_common,
   7140 						    skc_num),
   7141 				       target_size));
   7142 		break;
   7143 
   7144 	case offsetof(struct bpf_sock, dst_port):
   7145 		*insn++ = BPF_LDX_MEM(
   7146 			BPF_FIELD_SIZEOF(struct sock_common, skc_dport),
   7147 			si->dst_reg, si->src_reg,
   7148 			bpf_target_off(struct sock_common, skc_dport,
   7149 				       FIELD_SIZEOF(struct sock_common,
   7150 						    skc_dport),
   7151 				       target_size));
   7152 		break;
   7153 
   7154 	case offsetof(struct bpf_sock, state):
   7155 		*insn++ = BPF_LDX_MEM(
   7156 			BPF_FIELD_SIZEOF(struct sock_common, skc_state),
   7157 			si->dst_reg, si->src_reg,
   7158 			bpf_target_off(struct sock_common, skc_state,
   7159 				       FIELD_SIZEOF(struct sock_common,
   7160 						    skc_state),
   7161 				       target_size));
   7162 		break;
   7163 	}
   7164 
   7165 	return insn - insn_buf;
   7166 }
   7167 
   7168 static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type,
   7169 					 const struct bpf_insn *si,
   7170 					 struct bpf_insn *insn_buf,
   7171 					 struct bpf_prog *prog, u32 *target_size)
   7172 {
   7173 	struct bpf_insn *insn = insn_buf;
   7174 
   7175 	switch (si->off) {
   7176 	case offsetof(struct __sk_buff, ifindex):
   7177 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
   7178 				      si->dst_reg, si->src_reg,
   7179 				      offsetof(struct sk_buff, dev));
   7180 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
   7181 				      bpf_target_off(struct net_device, ifindex, 4,
   7182 						     target_size));
   7183 		break;
   7184 	default:
   7185 		return bpf_convert_ctx_access(type, si, insn_buf, prog,
   7186 					      target_size);
   7187 	}
   7188 
   7189 	return insn - insn_buf;
   7190 }
   7191 
   7192 static u32 xdp_convert_ctx_access(enum bpf_access_type type,
   7193 				  const struct bpf_insn *si,
   7194 				  struct bpf_insn *insn_buf,
   7195 				  struct bpf_prog *prog, u32 *target_size)
   7196 {
   7197 	struct bpf_insn *insn = insn_buf;
   7198 
   7199 	switch (si->off) {
   7200 	case offsetof(struct xdp_md, data):
   7201 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data),
   7202 				      si->dst_reg, si->src_reg,
   7203 				      offsetof(struct xdp_buff, data));
   7204 		break;
   7205 	case offsetof(struct xdp_md, data_meta):
   7206 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_meta),
   7207 				      si->dst_reg, si->src_reg,
   7208 				      offsetof(struct xdp_buff, data_meta));
   7209 		break;
   7210 	case offsetof(struct xdp_md, data_end):
   7211 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_end),
   7212 				      si->dst_reg, si->src_reg,
   7213 				      offsetof(struct xdp_buff, data_end));
   7214 		break;
   7215 	case offsetof(struct xdp_md, ingress_ifindex):
   7216 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
   7217 				      si->dst_reg, si->src_reg,
   7218 				      offsetof(struct xdp_buff, rxq));
   7219 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_rxq_info, dev),
   7220 				      si->dst_reg, si->dst_reg,
   7221 				      offsetof(struct xdp_rxq_info, dev));
   7222 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
   7223 				      offsetof(struct net_device, ifindex));
   7224 		break;
   7225 	case offsetof(struct xdp_md, rx_queue_index):
   7226 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
   7227 				      si->dst_reg, si->src_reg,
   7228 				      offsetof(struct xdp_buff, rxq));
   7229 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
   7230 				      offsetof(struct xdp_rxq_info,
   7231 					       queue_index));
   7232 		break;
   7233 	}
   7234 
   7235 	return insn - insn_buf;
   7236 }
   7237 
   7238 /* SOCK_ADDR_LOAD_NESTED_FIELD() loads Nested Field S.F.NF where S is type of
   7239  * context Structure, F is Field in context structure that contains a pointer
   7240  * to Nested Structure of type NS that has the field NF.
   7241  *
   7242  * SIZE encodes the load size (BPF_B, BPF_H, etc). It's up to caller to make
   7243  * sure that SIZE is not greater than actual size of S.F.NF.
   7244  *
   7245  * If offset OFF is provided, the load happens from that offset relative to
   7246  * offset of NF.
   7247  */
   7248 #define SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF)	       \
   7249 	do {								       \
   7250 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), si->dst_reg,     \
   7251 				      si->src_reg, offsetof(S, F));	       \
   7252 		*insn++ = BPF_LDX_MEM(					       \
   7253 			SIZE, si->dst_reg, si->dst_reg,			       \
   7254 			bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF),	       \
   7255 				       target_size)			       \
   7256 				+ OFF);					       \
   7257 	} while (0)
   7258 
   7259 #define SOCK_ADDR_LOAD_NESTED_FIELD(S, NS, F, NF)			       \
   7260 	SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF,		       \
   7261 					     BPF_FIELD_SIZEOF(NS, NF), 0)
   7262 
   7263 /* SOCK_ADDR_STORE_NESTED_FIELD_OFF() has semantic similar to
   7264  * SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF() but for store operation.
   7265  *
   7266  * It doesn't support SIZE argument though since narrow stores are not
   7267  * supported for now.
   7268  *
   7269  * In addition it uses Temporary Field TF (member of struct S) as the 3rd
   7270  * "register" since two registers available in convert_ctx_access are not
   7271  * enough: we can't override neither SRC, since it contains value to store, nor
   7272  * DST since it contains pointer to context that may be used by later
   7273  * instructions. But we need a temporary place to save pointer to nested
   7274  * structure whose field we want to store to.
   7275  */
   7276 #define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF, TF)		       \
   7277 	do {								       \
   7278 		int tmp_reg = BPF_REG_9;				       \
   7279 		if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg)	       \
   7280 			--tmp_reg;					       \
   7281 		if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg)	       \
   7282 			--tmp_reg;					       \
   7283 		*insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, tmp_reg,	       \
   7284 				      offsetof(S, TF));			       \
   7285 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg,	       \
   7286 				      si->dst_reg, offsetof(S, F));	       \
   7287 		*insn++ = BPF_STX_MEM(					       \
   7288 			BPF_FIELD_SIZEOF(NS, NF), tmp_reg, si->src_reg,	       \
   7289 			bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF),	       \
   7290 				       target_size)			       \
   7291 				+ OFF);					       \
   7292 		*insn++ = BPF_LDX_MEM(BPF_DW, tmp_reg, si->dst_reg,	       \
   7293 				      offsetof(S, TF));			       \
   7294 	} while (0)
   7295 
   7296 #define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF, \
   7297 						      TF)		       \
   7298 	do {								       \
   7299 		if (type == BPF_WRITE) {				       \
   7300 			SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF,    \
   7301 							 TF);		       \
   7302 		} else {						       \
   7303 			SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(		       \
   7304 				S, NS, F, NF, SIZE, OFF);  \
   7305 		}							       \
   7306 	} while (0)
   7307 
   7308 #define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(S, NS, F, NF, TF)		       \
   7309 	SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(			       \
   7310 		S, NS, F, NF, BPF_FIELD_SIZEOF(NS, NF), 0, TF)
   7311 
   7312 static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,
   7313 					const struct bpf_insn *si,
   7314 					struct bpf_insn *insn_buf,
   7315 					struct bpf_prog *prog, u32 *target_size)
   7316 {
   7317 	struct bpf_insn *insn = insn_buf;
   7318 	int off;
   7319 
   7320 	switch (si->off) {
   7321 	case offsetof(struct bpf_sock_addr, user_family):
   7322 		SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
   7323 					    struct sockaddr, uaddr, sa_family);
   7324 		break;
   7325 
   7326 	case offsetof(struct bpf_sock_addr, user_ip4):
   7327 		SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
   7328 			struct bpf_sock_addr_kern, struct sockaddr_in, uaddr,
   7329 			sin_addr, BPF_SIZE(si->code), 0, tmp_reg);
   7330 		break;
   7331 
   7332 	case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
   7333 		off = si->off;
   7334 		off -= offsetof(struct bpf_sock_addr, user_ip6[0]);
   7335 		SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
   7336 			struct bpf_sock_addr_kern, struct sockaddr_in6, uaddr,
   7337 			sin6_addr.s6_addr32[0], BPF_SIZE(si->code), off,
   7338 			tmp_reg);
   7339 		break;
   7340 
   7341 	case offsetof(struct bpf_sock_addr, user_port):
   7342 		/* To get port we need to know sa_family first and then treat
   7343 		 * sockaddr as either sockaddr_in or sockaddr_in6.
   7344 		 * Though we can simplify since port field has same offset and
   7345 		 * size in both structures.
   7346 		 * Here we check this invariant and use just one of the
   7347 		 * structures if it's true.
   7348 		 */
   7349 		BUILD_BUG_ON(offsetof(struct sockaddr_in, sin_port) !=
   7350 			     offsetof(struct sockaddr_in6, sin6_port));
   7351 		BUILD_BUG_ON(FIELD_SIZEOF(struct sockaddr_in, sin_port) !=
   7352 			     FIELD_SIZEOF(struct sockaddr_in6, sin6_port));
   7353 		SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(struct bpf_sock_addr_kern,
   7354 						     struct sockaddr_in6, uaddr,
   7355 						     sin6_port, tmp_reg);
   7356 		break;
   7357 
   7358 	case offsetof(struct bpf_sock_addr, family):
   7359 		SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
   7360 					    struct sock, sk, sk_family);
   7361 		break;
   7362 
   7363 	case offsetof(struct bpf_sock_addr, type):
   7364 		SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(
   7365 			struct bpf_sock_addr_kern, struct sock, sk,
   7366 			__sk_flags_offset, BPF_W, 0);
   7367 		*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK);
   7368 		*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT);
   7369 		break;
   7370 
   7371 	case offsetof(struct bpf_sock_addr, protocol):
   7372 		SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(
   7373 			struct bpf_sock_addr_kern, struct sock, sk,
   7374 			__sk_flags_offset, BPF_W, 0);
   7375 		*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
   7376 		*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg,
   7377 					SK_FL_PROTO_SHIFT);
   7378 		break;
   7379 
   7380 	case offsetof(struct bpf_sock_addr, msg_src_ip4):
   7381 		/* Treat t_ctx as struct in_addr for msg_src_ip4. */
   7382 		SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
   7383 			struct bpf_sock_addr_kern, struct in_addr, t_ctx,
   7384 			s_addr, BPF_SIZE(si->code), 0, tmp_reg);
   7385 		break;
   7386 
   7387 	case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
   7388 				msg_src_ip6[3]):
   7389 		off = si->off;
   7390 		off -= offsetof(struct bpf_sock_addr, msg_src_ip6[0]);
   7391 		/* Treat t_ctx as struct in6_addr for msg_src_ip6. */
   7392 		SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
   7393 			struct bpf_sock_addr_kern, struct in6_addr, t_ctx,
   7394 			s6_addr32[0], BPF_SIZE(si->code), off, tmp_reg);
   7395 		break;
   7396 	}
   7397 
   7398 	return insn - insn_buf;
   7399 }
   7400 
   7401 static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
   7402 				       const struct bpf_insn *si,
   7403 				       struct bpf_insn *insn_buf,
   7404 				       struct bpf_prog *prog,
   7405 				       u32 *target_size)
   7406 {
   7407 	struct bpf_insn *insn = insn_buf;
   7408 	int off;
   7409 
   7410 /* Helper macro for adding read access to tcp_sock or sock fields. */
   7411 #define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ)			      \
   7412 	do {								      \
   7413 		BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) >		      \
   7414 			     FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD));   \
   7415 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(			      \
   7416 						struct bpf_sock_ops_kern,     \
   7417 						is_fullsock),		      \
   7418 				      si->dst_reg, si->src_reg,		      \
   7419 				      offsetof(struct bpf_sock_ops_kern,      \
   7420 					       is_fullsock));		      \
   7421 		*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 2);	      \
   7422 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(			      \
   7423 						struct bpf_sock_ops_kern, sk),\
   7424 				      si->dst_reg, si->src_reg,		      \
   7425 				      offsetof(struct bpf_sock_ops_kern, sk));\
   7426 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(OBJ,		      \
   7427 						       OBJ_FIELD),	      \
   7428 				      si->dst_reg, si->dst_reg,		      \
   7429 				      offsetof(OBJ, OBJ_FIELD));	      \
   7430 	} while (0)
   7431 
   7432 #define SOCK_OPS_GET_TCP_SOCK_FIELD(FIELD) \
   7433 		SOCK_OPS_GET_FIELD(FIELD, FIELD, struct tcp_sock)
   7434 
   7435 /* Helper macro for adding write access to tcp_sock or sock fields.
   7436  * The macro is called with two registers, dst_reg which contains a pointer
   7437  * to ctx (context) and src_reg which contains the value that should be
   7438  * stored. However, we need an additional register since we cannot overwrite
   7439  * dst_reg because it may be used later in the program.
   7440  * Instead we "borrow" one of the other register. We first save its value
   7441  * into a new (temp) field in bpf_sock_ops_kern, use it, and then restore
   7442  * it at the end of the macro.
   7443  */
   7444 #define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ)			      \
   7445 	do {								      \
   7446 		int reg = BPF_REG_9;					      \
   7447 		BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) >		      \
   7448 			     FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD));   \
   7449 		if (si->dst_reg == reg || si->src_reg == reg)		      \
   7450 			reg--;						      \
   7451 		if (si->dst_reg == reg || si->src_reg == reg)		      \
   7452 			reg--;						      \
   7453 		*insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, reg,		      \
   7454 				      offsetof(struct bpf_sock_ops_kern,      \
   7455 					       temp));			      \
   7456 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(			      \
   7457 						struct bpf_sock_ops_kern,     \
   7458 						is_fullsock),		      \
   7459 				      reg, si->dst_reg,			      \
   7460 				      offsetof(struct bpf_sock_ops_kern,      \
   7461 					       is_fullsock));		      \
   7462 		*insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2);		      \
   7463 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(			      \
   7464 						struct bpf_sock_ops_kern, sk),\
   7465 				      reg, si->dst_reg,			      \
   7466 				      offsetof(struct bpf_sock_ops_kern, sk));\
   7467 		*insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD),	      \
   7468 				      reg, si->src_reg,			      \
   7469 				      offsetof(OBJ, OBJ_FIELD));	      \
   7470 		*insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg,		      \
   7471 				      offsetof(struct bpf_sock_ops_kern,      \
   7472 					       temp));			      \
   7473 	} while (0)
   7474 
   7475 #define SOCK_OPS_GET_OR_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ, TYPE)	      \
   7476 	do {								      \
   7477 		if (TYPE == BPF_WRITE)					      \
   7478 			SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ);	      \
   7479 		else							      \
   7480 			SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ);	      \
   7481 	} while (0)
   7482 
   7483 	CONVERT_COMMON_TCP_SOCK_FIELDS(struct bpf_sock_ops,
   7484 				       SOCK_OPS_GET_TCP_SOCK_FIELD);
   7485 
   7486 	if (insn > insn_buf)
   7487 		return insn - insn_buf;
   7488 
   7489 	switch (si->off) {
   7490 	case offsetof(struct bpf_sock_ops, op) ...
   7491 	     offsetof(struct bpf_sock_ops, replylong[3]):
   7492 		BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, op) !=
   7493 			     FIELD_SIZEOF(struct bpf_sock_ops_kern, op));
   7494 		BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, reply) !=
   7495 			     FIELD_SIZEOF(struct bpf_sock_ops_kern, reply));
   7496 		BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, replylong) !=
   7497 			     FIELD_SIZEOF(struct bpf_sock_ops_kern, replylong));
   7498 		off = si->off;
   7499 		off -= offsetof(struct bpf_sock_ops, op);
   7500 		off += offsetof(struct bpf_sock_ops_kern, op);
   7501 		if (type == BPF_WRITE)
   7502 			*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
   7503 					      off);
   7504 		else
   7505 			*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
   7506 					      off);
   7507 		break;
   7508 
   7509 	case offsetof(struct bpf_sock_ops, family):
   7510 		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
   7511 
   7512 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
   7513 					      struct bpf_sock_ops_kern, sk),
   7514 				      si->dst_reg, si->src_reg,
   7515 				      offsetof(struct bpf_sock_ops_kern, sk));
   7516 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
   7517 				      offsetof(struct sock_common, skc_family));
   7518 		break;
   7519 
   7520 	case offsetof(struct bpf_sock_ops, remote_ip4):
   7521 		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
   7522 
   7523 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
   7524 						struct bpf_sock_ops_kern, sk),
   7525 				      si->dst_reg, si->src_reg,
   7526 				      offsetof(struct bpf_sock_ops_kern, sk));
   7527 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
   7528 				      offsetof(struct sock_common, skc_daddr));
   7529 		break;
   7530 
   7531 	case offsetof(struct bpf_sock_ops, local_ip4):
   7532 		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
   7533 					  skc_rcv_saddr) != 4);
   7534 
   7535 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
   7536 					      struct bpf_sock_ops_kern, sk),
   7537 				      si->dst_reg, si->src_reg,
   7538 				      offsetof(struct bpf_sock_ops_kern, sk));
   7539 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
   7540 				      offsetof(struct sock_common,
   7541 					       skc_rcv_saddr));
   7542 		break;
   7543 
   7544 	case offsetof(struct bpf_sock_ops, remote_ip6[0]) ...
   7545 	     offsetof(struct bpf_sock_ops, remote_ip6[3]):
   7546 #if IS_ENABLED(CONFIG_IPV6)
   7547 		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
   7548 					  skc_v6_daddr.s6_addr32[0]) != 4);
   7549 
   7550 		off = si->off;
   7551 		off -= offsetof(struct bpf_sock_ops, remote_ip6[0]);
   7552 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
   7553 						struct bpf_sock_ops_kern, sk),
   7554 				      si->dst_reg, si->src_reg,
   7555 				      offsetof(struct bpf_sock_ops_kern, sk));
   7556 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
   7557 				      offsetof(struct sock_common,
   7558 					       skc_v6_daddr.s6_addr32[0]) +
   7559 				      off);
   7560 #else
   7561 		*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
   7562 #endif
   7563 		break;
   7564 
   7565 	case offsetof(struct bpf_sock_ops, local_ip6[0]) ...
   7566 	     offsetof(struct bpf_sock_ops, local_ip6[3]):
   7567 #if IS_ENABLED(CONFIG_IPV6)
   7568 		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
   7569 					  skc_v6_rcv_saddr.s6_addr32[0]) != 4);
   7570 
   7571 		off = si->off;
   7572 		off -= offsetof(struct bpf_sock_ops, local_ip6[0]);
   7573 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
   7574 						struct bpf_sock_ops_kern, sk),
   7575 				      si->dst_reg, si->src_reg,
   7576 				      offsetof(struct bpf_sock_ops_kern, sk));
   7577 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
   7578 				      offsetof(struct sock_common,
   7579 					       skc_v6_rcv_saddr.s6_addr32[0]) +
   7580 				      off);
   7581 #else
   7582 		*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
   7583 #endif
   7584 		break;
   7585 
   7586 	case offsetof(struct bpf_sock_ops, remote_port):
   7587 		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
   7588 
   7589 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
   7590 						struct bpf_sock_ops_kern, sk),
   7591 				      si->dst_reg, si->src_reg,
   7592 				      offsetof(struct bpf_sock_ops_kern, sk));
   7593 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
   7594 				      offsetof(struct sock_common, skc_dport));
   7595 #ifndef __BIG_ENDIAN_BITFIELD
   7596 		*insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
   7597 #endif
   7598 		break;
   7599 
   7600 	case offsetof(struct bpf_sock_ops, local_port):
   7601 		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
   7602 
   7603 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
   7604 						struct bpf_sock_ops_kern, sk),
   7605 				      si->dst_reg, si->src_reg,
   7606 				      offsetof(struct bpf_sock_ops_kern, sk));
   7607 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
   7608 				      offsetof(struct sock_common, skc_num));
   7609 		break;
   7610 
   7611 	case offsetof(struct bpf_sock_ops, is_fullsock):
   7612 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
   7613 						struct bpf_sock_ops_kern,
   7614 						is_fullsock),
   7615 				      si->dst_reg, si->src_reg,
   7616 				      offsetof(struct bpf_sock_ops_kern,
   7617 					       is_fullsock));
   7618 		break;
   7619 
   7620 	case offsetof(struct bpf_sock_ops, state):
   7621 		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_state) != 1);
   7622 
   7623 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
   7624 						struct bpf_sock_ops_kern, sk),
   7625 				      si->dst_reg, si->src_reg,
   7626 				      offsetof(struct bpf_sock_ops_kern, sk));
   7627 		*insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->dst_reg,
   7628 				      offsetof(struct sock_common, skc_state));
   7629 		break;
   7630 
   7631 	case offsetof(struct bpf_sock_ops, rtt_min):
   7632 		BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, rtt_min) !=
   7633 			     sizeof(struct minmax));
   7634 		BUILD_BUG_ON(sizeof(struct minmax) <
   7635 			     sizeof(struct minmax_sample));
   7636 
   7637 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
   7638 						struct bpf_sock_ops_kern, sk),
   7639 				      si->dst_reg, si->src_reg,
   7640 				      offsetof(struct bpf_sock_ops_kern, sk));
   7641 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
   7642 				      offsetof(struct tcp_sock, rtt_min) +
   7643 				      FIELD_SIZEOF(struct minmax_sample, t));
   7644 		break;
   7645 
   7646 	case offsetof(struct bpf_sock_ops, bpf_sock_ops_cb_flags):
   7647 		SOCK_OPS_GET_FIELD(bpf_sock_ops_cb_flags, bpf_sock_ops_cb_flags,
   7648 				   struct tcp_sock);
   7649 		break;
   7650 
   7651 	case offsetof(struct bpf_sock_ops, sk_txhash):
   7652 		SOCK_OPS_GET_OR_SET_FIELD(sk_txhash, sk_txhash,
   7653 					  struct sock, type);
   7654 		break;
   7655 	}
   7656 	return insn - insn_buf;
   7657 }
   7658 
   7659 static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
   7660 				     const struct bpf_insn *si,
   7661 				     struct bpf_insn *insn_buf,
   7662 				     struct bpf_prog *prog, u32 *target_size)
   7663 {
   7664 	struct bpf_insn *insn = insn_buf;
   7665 	int off;
   7666 
   7667 	switch (si->off) {
   7668 	case offsetof(struct __sk_buff, data_end):
   7669 		off  = si->off;
   7670 		off -= offsetof(struct __sk_buff, data_end);
   7671 		off += offsetof(struct sk_buff, cb);
   7672 		off += offsetof(struct tcp_skb_cb, bpf.data_end);
   7673 		*insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
   7674 				      si->src_reg, off);
   7675 		break;
   7676 	default:
   7677 		return bpf_convert_ctx_access(type, si, insn_buf, prog,
   7678 					      target_size);
   7679 	}
   7680 
   7681 	return insn - insn_buf;
   7682 }
   7683 
   7684 static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
   7685 				     const struct bpf_insn *si,
   7686 				     struct bpf_insn *insn_buf,
   7687 				     struct bpf_prog *prog, u32 *target_size)
   7688 {
   7689 	struct bpf_insn *insn = insn_buf;
   7690 #if IS_ENABLED(CONFIG_IPV6)
   7691 	int off;
   7692 #endif
   7693 
   7694 	/* convert ctx uses the fact sg element is first in struct */
   7695 	BUILD_BUG_ON(offsetof(struct sk_msg, sg) != 0);
   7696 
   7697 	switch (si->off) {
   7698 	case offsetof(struct sk_msg_md, data):
   7699 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg, data),
   7700 				      si->dst_reg, si->src_reg,
   7701 				      offsetof(struct sk_msg, data));
   7702 		break;
   7703 	case offsetof(struct sk_msg_md, data_end):
   7704 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg, data_end),
   7705 				      si->dst_reg, si->src_reg,
   7706 				      offsetof(struct sk_msg, data_end));
   7707 		break;
   7708 	case offsetof(struct sk_msg_md, family):
   7709 		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
   7710 
   7711 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
   7712 					      struct sk_msg, sk),
   7713 				      si->dst_reg, si->src_reg,
   7714 				      offsetof(struct sk_msg, sk));
   7715 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
   7716 				      offsetof(struct sock_common, skc_family));
   7717 		break;
   7718 
   7719 	case offsetof(struct sk_msg_md, remote_ip4):
   7720 		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
   7721 
   7722 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
   7723 						struct sk_msg, sk),
   7724 				      si->dst_reg, si->src_reg,
   7725 				      offsetof(struct sk_msg, sk));
   7726 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
   7727 				      offsetof(struct sock_common, skc_daddr));
   7728 		break;
   7729 
   7730 	case offsetof(struct sk_msg_md, local_ip4):
   7731 		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
   7732 					  skc_rcv_saddr) != 4);
   7733 
   7734 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
   7735 					      struct sk_msg, sk),
   7736 				      si->dst_reg, si->src_reg,
   7737 				      offsetof(struct sk_msg, sk));
   7738 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
   7739 				      offsetof(struct sock_common,
   7740 					       skc_rcv_saddr));
   7741 		break;
   7742 
   7743 	case offsetof(struct sk_msg_md, remote_ip6[0]) ...
   7744 	     offsetof(struct sk_msg_md, remote_ip6[3]):
   7745 #if IS_ENABLED(CONFIG_IPV6)
   7746 		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
   7747 					  skc_v6_daddr.s6_addr32[0]) != 4);
   7748 
   7749 		off = si->off;
   7750 		off -= offsetof(struct sk_msg_md, remote_ip6[0]);
   7751 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
   7752 						struct sk_msg, sk),
   7753 				      si->dst_reg, si->src_reg,
   7754 				      offsetof(struct sk_msg, sk));
   7755 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
   7756 				      offsetof(struct sock_common,
   7757 					       skc_v6_daddr.s6_addr32[0]) +
   7758 				      off);
   7759 #else
   7760 		*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
   7761 #endif
   7762 		break;
   7763 
   7764 	case offsetof(struct sk_msg_md, local_ip6[0]) ...
   7765 	     offsetof(struct sk_msg_md, local_ip6[3]):
   7766 #if IS_ENABLED(CONFIG_IPV6)
   7767 		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
   7768 					  skc_v6_rcv_saddr.s6_addr32[0]) != 4);
   7769 
   7770 		off = si->off;
   7771 		off -= offsetof(struct sk_msg_md, local_ip6[0]);
   7772 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
   7773 						struct sk_msg, sk),
   7774 				      si->dst_reg, si->src_reg,
   7775 				      offsetof(struct sk_msg, sk));
   7776 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
   7777 				      offsetof(struct sock_common,
   7778 					       skc_v6_rcv_saddr.s6_addr32[0]) +
   7779 				      off);
   7780 #else
   7781 		*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
   7782 #endif
   7783 		break;
   7784 
   7785 	case offsetof(struct sk_msg_md, remote_port):
   7786 		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
   7787 
   7788 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
   7789 						struct sk_msg, sk),
   7790 				      si->dst_reg, si->src_reg,
   7791 				      offsetof(struct sk_msg, sk));
   7792 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
   7793 				      offsetof(struct sock_common, skc_dport));
   7794 #ifndef __BIG_ENDIAN_BITFIELD
   7795 		*insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
   7796 #endif
   7797 		break;
   7798 
   7799 	case offsetof(struct sk_msg_md, local_port):
   7800 		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
   7801 
   7802 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
   7803 						struct sk_msg, sk),
   7804 				      si->dst_reg, si->src_reg,
   7805 				      offsetof(struct sk_msg, sk));
   7806 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
   7807 				      offsetof(struct sock_common, skc_num));
   7808 		break;
   7809 
   7810 	case offsetof(struct sk_msg_md, size):
   7811 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_sg, size),
   7812 				      si->dst_reg, si->src_reg,
   7813 				      offsetof(struct sk_msg_sg, size));
   7814 		break;
   7815 	}
   7816 
   7817 	return insn - insn_buf;
   7818 }
   7819 
   7820 const struct bpf_verifier_ops sk_filter_verifier_ops = {
   7821 	.get_func_proto		= sk_filter_func_proto,
   7822 	.is_valid_access	= sk_filter_is_valid_access,
   7823 	.convert_ctx_access	= bpf_convert_ctx_access,
   7824 	.gen_ld_abs		= bpf_gen_ld_abs,
   7825 };
   7826 
   7827 const struct bpf_prog_ops sk_filter_prog_ops = {
   7828 	.test_run		= bpf_prog_test_run_skb,
   7829 };
   7830 
   7831 const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
   7832 	.get_func_proto		= tc_cls_act_func_proto,
   7833 	.is_valid_access	= tc_cls_act_is_valid_access,
   7834 	.convert_ctx_access	= tc_cls_act_convert_ctx_access,
   7835 	.gen_prologue		= tc_cls_act_prologue,
   7836 	.gen_ld_abs		= bpf_gen_ld_abs,
   7837 };
   7838 
   7839 const struct bpf_prog_ops tc_cls_act_prog_ops = {
   7840 	.test_run		= bpf_prog_test_run_skb,
   7841 };
   7842 
   7843 const struct bpf_verifier_ops xdp_verifier_ops = {
   7844 	.get_func_proto		= xdp_func_proto,
   7845 	.is_valid_access	= xdp_is_valid_access,
   7846 	.convert_ctx_access	= xdp_convert_ctx_access,
   7847 	.gen_prologue		= bpf_noop_prologue,
   7848 };
   7849 
   7850 const struct bpf_prog_ops xdp_prog_ops = {
   7851 	.test_run		= bpf_prog_test_run_xdp,
   7852 };
   7853 
   7854 const struct bpf_verifier_ops cg_skb_verifier_ops = {
   7855 	.get_func_proto		= cg_skb_func_proto,
   7856 	.is_valid_access	= cg_skb_is_valid_access,
   7857 	.convert_ctx_access	= bpf_convert_ctx_access,
   7858 };
   7859 
   7860 const struct bpf_prog_ops cg_skb_prog_ops = {
   7861 	.test_run		= bpf_prog_test_run_skb,
   7862 };
   7863 
   7864 const struct bpf_verifier_ops lwt_in_verifier_ops = {
   7865 	.get_func_proto		= lwt_in_func_proto,
   7866 	.is_valid_access	= lwt_is_valid_access,
   7867 	.convert_ctx_access	= bpf_convert_ctx_access,
   7868 };
   7869 
   7870 const struct bpf_prog_ops lwt_in_prog_ops = {
   7871 	.test_run		= bpf_prog_test_run_skb,
   7872 };
   7873 
   7874 const struct bpf_verifier_ops lwt_out_verifier_ops = {
   7875 	.get_func_proto		= lwt_out_func_proto,
   7876 	.is_valid_access	= lwt_is_valid_access,
   7877 	.convert_ctx_access	= bpf_convert_ctx_access,
   7878 };
   7879 
   7880 const struct bpf_prog_ops lwt_out_prog_ops = {
   7881 	.test_run		= bpf_prog_test_run_skb,
   7882 };
   7883 
   7884 const struct bpf_verifier_ops lwt_xmit_verifier_ops = {
   7885 	.get_func_proto		= lwt_xmit_func_proto,
   7886 	.is_valid_access	= lwt_is_valid_access,
   7887 	.convert_ctx_access	= bpf_convert_ctx_access,
   7888 	.gen_prologue		= tc_cls_act_prologue,
   7889 };
   7890 
   7891 const struct bpf_prog_ops lwt_xmit_prog_ops = {
   7892 	.test_run		= bpf_prog_test_run_skb,
   7893 };
   7894 
   7895 const struct bpf_verifier_ops lwt_seg6local_verifier_ops = {
   7896 	.get_func_proto		= lwt_seg6local_func_proto,
   7897 	.is_valid_access	= lwt_is_valid_access,
   7898 	.convert_ctx_access	= bpf_convert_ctx_access,
   7899 };
   7900 
   7901 const struct bpf_prog_ops lwt_seg6local_prog_ops = {
   7902 	.test_run		= bpf_prog_test_run_skb,
   7903 };
   7904 
   7905 const struct bpf_verifier_ops cg_sock_verifier_ops = {
   7906 	.get_func_proto		= sock_filter_func_proto,
   7907 	.is_valid_access	= sock_filter_is_valid_access,
   7908 	.convert_ctx_access	= bpf_sock_convert_ctx_access,
   7909 };
   7910 
   7911 const struct bpf_prog_ops cg_sock_prog_ops = {
   7912 };
   7913 
   7914 const struct bpf_verifier_ops cg_sock_addr_verifier_ops = {
   7915 	.get_func_proto		= sock_addr_func_proto,
   7916 	.is_valid_access	= sock_addr_is_valid_access,
   7917 	.convert_ctx_access	= sock_addr_convert_ctx_access,
   7918 };
   7919 
   7920 const struct bpf_prog_ops cg_sock_addr_prog_ops = {
   7921 };
   7922 
   7923 const struct bpf_verifier_ops sock_ops_verifier_ops = {
   7924 	.get_func_proto		= sock_ops_func_proto,
   7925 	.is_valid_access	= sock_ops_is_valid_access,
   7926 	.convert_ctx_access	= sock_ops_convert_ctx_access,
   7927 };
   7928 
   7929 const struct bpf_prog_ops sock_ops_prog_ops = {
   7930 };
   7931 
   7932 const struct bpf_verifier_ops sk_skb_verifier_ops = {
   7933 	.get_func_proto		= sk_skb_func_proto,
   7934 	.is_valid_access	= sk_skb_is_valid_access,
   7935 	.convert_ctx_access	= sk_skb_convert_ctx_access,
   7936 	.gen_prologue		= sk_skb_prologue,
   7937 };
   7938 
   7939 const struct bpf_prog_ops sk_skb_prog_ops = {
   7940 };
   7941 
   7942 const struct bpf_verifier_ops sk_msg_verifier_ops = {
   7943 	.get_func_proto		= sk_msg_func_proto,
   7944 	.is_valid_access	= sk_msg_is_valid_access,
   7945 	.convert_ctx_access	= sk_msg_convert_ctx_access,
   7946 	.gen_prologue		= bpf_noop_prologue,
   7947 };
   7948 
   7949 const struct bpf_prog_ops sk_msg_prog_ops = {
   7950 };
   7951 
   7952 const struct bpf_verifier_ops flow_dissector_verifier_ops = {
   7953 	.get_func_proto		= flow_dissector_func_proto,
   7954 	.is_valid_access	= flow_dissector_is_valid_access,
   7955 	.convert_ctx_access	= bpf_convert_ctx_access,
   7956 };
   7957 
   7958 const struct bpf_prog_ops flow_dissector_prog_ops = {
   7959 	.test_run		= bpf_prog_test_run_flow_dissector,
   7960 };
   7961 
   7962 int sk_detach_filter(struct sock *sk)
   7963 {
   7964 	int ret = -ENOENT;
   7965 	struct sk_filter *filter;
   7966 
   7967 	if (sock_flag(sk, SOCK_FILTER_LOCKED))
   7968 		return -EPERM;
   7969 
   7970 	filter = rcu_dereference_protected(sk->sk_filter,
   7971 					   lockdep_sock_is_held(sk));
   7972 	if (filter) {
   7973 		RCU_INIT_POINTER(sk->sk_filter, NULL);
   7974 		sk_filter_uncharge(sk, filter);
   7975 		ret = 0;
   7976 	}
   7977 
   7978 	return ret;
   7979 }
   7980 EXPORT_SYMBOL_GPL(sk_detach_filter);
   7981 
   7982 int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
   7983 		  unsigned int len)
   7984 {
   7985 	struct sock_fprog_kern *fprog;
   7986 	struct sk_filter *filter;
   7987 	int ret = 0;
   7988 
   7989 	lock_sock(sk);
   7990 	filter = rcu_dereference_protected(sk->sk_filter,
   7991 					   lockdep_sock_is_held(sk));
   7992 	if (!filter)
   7993 		goto out;
   7994 
   7995 	/* We're copying the filter that has been originally attached,
   7996 	 * so no conversion/decode needed anymore. eBPF programs that
   7997 	 * have no original program cannot be dumped through this.
   7998 	 */
   7999 	ret = -EACCES;
   8000 	fprog = filter->prog->orig_prog;
   8001 	if (!fprog)
   8002 		goto out;
   8003 
   8004 	ret = fprog->len;
   8005 	if (!len)
   8006 		/* User space only enquires number of filter blocks. */
   8007 		goto out;
   8008 
   8009 	ret = -EINVAL;
   8010 	if (len < fprog->len)
   8011 		goto out;
   8012 
   8013 	ret = -EFAULT;
   8014 	if (copy_to_user(ubuf, fprog->filter, bpf_classic_proglen(fprog)))
   8015 		goto out;
   8016 
   8017 	/* Instead of bytes, the API requests to return the number
   8018 	 * of filter blocks.
   8019 	 */
   8020 	ret = fprog->len;
   8021 out:
   8022 	release_sock(sk);
   8023 	return ret;
   8024 }
   8025 
   8026 #ifdef CONFIG_INET
   8027 struct sk_reuseport_kern {
   8028 	struct sk_buff *skb;
   8029 	struct sock *sk;
   8030 	struct sock *selected_sk;
   8031 	void *data_end;
   8032 	u32 hash;
   8033 	u32 reuseport_id;
   8034 	bool bind_inany;
   8035 };
   8036 
   8037 static void bpf_init_reuseport_kern(struct sk_reuseport_kern *reuse_kern,
   8038 				    struct sock_reuseport *reuse,
   8039 				    struct sock *sk, struct sk_buff *skb,
   8040 				    u32 hash)
   8041 {
   8042 	reuse_kern->skb = skb;
   8043 	reuse_kern->sk = sk;
   8044 	reuse_kern->selected_sk = NULL;
   8045 	reuse_kern->data_end = skb->data + skb_headlen(skb);
   8046 	reuse_kern->hash = hash;
   8047 	reuse_kern->reuseport_id = reuse->reuseport_id;
   8048 	reuse_kern->bind_inany = reuse->bind_inany;
   8049 }
   8050 
   8051 struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
   8052 				  struct bpf_prog *prog, struct sk_buff *skb,
   8053 				  u32 hash)
   8054 {
   8055 	struct sk_reuseport_kern reuse_kern;
   8056 	enum sk_action action;
   8057 
   8058 	bpf_init_reuseport_kern(&reuse_kern, reuse, sk, skb, hash);
   8059 	action = BPF_PROG_RUN(prog, &reuse_kern);
   8060 
   8061 	if (action == SK_PASS)
   8062 		return reuse_kern.selected_sk;
   8063 	else
   8064 		return ERR_PTR(-ECONNREFUSED);
   8065 }
   8066 
   8067 BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern,
   8068 	   struct bpf_map *, map, void *, key, u32, flags)
   8069 {
   8070 	struct sock_reuseport *reuse;
   8071 	struct sock *selected_sk;
   8072 
   8073 	selected_sk = map->ops->map_lookup_elem(map, key);
   8074 	if (!selected_sk)
   8075 		return -ENOENT;
   8076 
   8077 	reuse = rcu_dereference(selected_sk->sk_reuseport_cb);
   8078 	if (!reuse)
   8079 		/* selected_sk is unhashed (e.g. by close()) after the
   8080 		 * above map_lookup_elem().  Treat selected_sk has already
   8081 		 * been removed from the map.
   8082 		 */
   8083 		return -ENOENT;
   8084 
   8085 	if (unlikely(reuse->reuseport_id != reuse_kern->reuseport_id)) {
   8086 		struct sock *sk;
   8087 
   8088 		if (unlikely(!reuse_kern->reuseport_id))
   8089 			/* There is a small race between adding the
   8090 			 * sk to the map and setting the
   8091 			 * reuse_kern->reuseport_id.
   8092 			 * Treat it as the sk has not been added to
   8093 			 * the bpf map yet.
   8094 			 */
   8095 			return -ENOENT;
   8096 
   8097 		sk = reuse_kern->sk;
   8098 		if (sk->sk_protocol != selected_sk->sk_protocol)
   8099 			return -EPROTOTYPE;
   8100 		else if (sk->sk_family != selected_sk->sk_family)
   8101 			return -EAFNOSUPPORT;
   8102 
   8103 		/* Catch all. Likely bound to a different sockaddr. */
   8104 		return -EBADFD;
   8105 	}
   8106 
   8107 	reuse_kern->selected_sk = selected_sk;
   8108 
   8109 	return 0;
   8110 }
   8111 
   8112 static const struct bpf_func_proto sk_select_reuseport_proto = {
   8113 	.func           = sk_select_reuseport,
   8114 	.gpl_only       = false,
   8115 	.ret_type       = RET_INTEGER,
   8116 	.arg1_type	= ARG_PTR_TO_CTX,
   8117 	.arg2_type      = ARG_CONST_MAP_PTR,
   8118 	.arg3_type      = ARG_PTR_TO_MAP_KEY,
   8119 	.arg4_type	= ARG_ANYTHING,
   8120 };
   8121 
   8122 BPF_CALL_4(sk_reuseport_load_bytes,
   8123 	   const struct sk_reuseport_kern *, reuse_kern, u32, offset,
   8124 	   void *, to, u32, len)
   8125 {
   8126 	return ____bpf_skb_load_bytes(reuse_kern->skb, offset, to, len);
   8127 }
   8128 
   8129 static const struct bpf_func_proto sk_reuseport_load_bytes_proto = {
   8130 	.func		= sk_reuseport_load_bytes,
   8131 	.gpl_only	= false,
   8132 	.ret_type	= RET_INTEGER,
   8133 	.arg1_type	= ARG_PTR_TO_CTX,
   8134 	.arg2_type	= ARG_ANYTHING,
   8135 	.arg3_type	= ARG_PTR_TO_UNINIT_MEM,
   8136 	.arg4_type	= ARG_CONST_SIZE,
   8137 };
   8138 
   8139 BPF_CALL_5(sk_reuseport_load_bytes_relative,
   8140 	   const struct sk_reuseport_kern *, reuse_kern, u32, offset,
   8141 	   void *, to, u32, len, u32, start_header)
   8142 {
   8143 	return ____bpf_skb_load_bytes_relative(reuse_kern->skb, offset, to,
   8144 					       len, start_header);
   8145 }
   8146 
   8147 static const struct bpf_func_proto sk_reuseport_load_bytes_relative_proto = {
   8148 	.func		= sk_reuseport_load_bytes_relative,
   8149 	.gpl_only	= false,
   8150 	.ret_type	= RET_INTEGER,
   8151 	.arg1_type	= ARG_PTR_TO_CTX,
   8152 	.arg2_type	= ARG_ANYTHING,
   8153 	.arg3_type	= ARG_PTR_TO_UNINIT_MEM,
   8154 	.arg4_type	= ARG_CONST_SIZE,
   8155 	.arg5_type	= ARG_ANYTHING,
   8156 };
   8157 
   8158 static const struct bpf_func_proto *
   8159 sk_reuseport_func_proto(enum bpf_func_id func_id,
   8160 			const struct bpf_prog *prog)
   8161 {
   8162 	switch (func_id) {
   8163 	case BPF_FUNC_sk_select_reuseport:
   8164 		return &sk_select_reuseport_proto;
   8165 	case BPF_FUNC_skb_load_bytes:
   8166 		return &sk_reuseport_load_bytes_proto;
   8167 	case BPF_FUNC_skb_load_bytes_relative:
   8168 		return &sk_reuseport_load_bytes_relative_proto;
   8169 	default:
   8170 		return bpf_base_func_proto(func_id);
   8171 	}
   8172 }
   8173 
   8174 static bool
   8175 sk_reuseport_is_valid_access(int off, int size,
   8176 			     enum bpf_access_type type,
   8177 			     const struct bpf_prog *prog,
   8178 			     struct bpf_insn_access_aux *info)
   8179 {
   8180 	const u32 size_default = sizeof(__u32);
   8181 
   8182 	if (off < 0 || off >= sizeof(struct sk_reuseport_md) ||
   8183 	    off % size || type != BPF_READ)
   8184 		return false;
   8185 
   8186 	switch (off) {
   8187 	case offsetof(struct sk_reuseport_md, data):
   8188 		info->reg_type = PTR_TO_PACKET;
   8189 		return size == sizeof(__u64);
   8190 
   8191 	case offsetof(struct sk_reuseport_md, data_end):
   8192 		info->reg_type = PTR_TO_PACKET_END;
   8193 		return size == sizeof(__u64);
   8194 
   8195 	case offsetof(struct sk_reuseport_md, hash):
   8196 		return size == size_default;
   8197 
   8198 	/* Fields that allow narrowing */
   8199 	case offsetof(struct sk_reuseport_md, eth_protocol):
   8200 		if (size < FIELD_SIZEOF(struct sk_buff, protocol))
   8201 			return false;
   8202 		/* fall through */
   8203 	case offsetof(struct sk_reuseport_md, ip_protocol):
   8204 	case offsetof(struct sk_reuseport_md, bind_inany):
   8205 	case offsetof(struct sk_reuseport_md, len):
   8206 		bpf_ctx_record_field_size(info, size_default);
   8207 		return bpf_ctx_narrow_access_ok(off, size, size_default);
   8208 
   8209 	default:
   8210 		return false;
   8211 	}
   8212 }
   8213 
   8214 #define SK_REUSEPORT_LOAD_FIELD(F) ({					\
   8215 	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_reuseport_kern, F), \
   8216 			      si->dst_reg, si->src_reg,			\
   8217 			      bpf_target_off(struct sk_reuseport_kern, F, \
   8218 					     FIELD_SIZEOF(struct sk_reuseport_kern, F), \
   8219 					     target_size));		\
   8220 	})
   8221 
   8222 #define SK_REUSEPORT_LOAD_SKB_FIELD(SKB_FIELD)				\
   8223 	SOCK_ADDR_LOAD_NESTED_FIELD(struct sk_reuseport_kern,		\
   8224 				    struct sk_buff,			\
   8225 				    skb,				\
   8226 				    SKB_FIELD)
   8227 
   8228 #define SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(SK_FIELD, BPF_SIZE, EXTRA_OFF) \
   8229 	SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(struct sk_reuseport_kern,	\
   8230 					     struct sock,		\
   8231 					     sk,			\
   8232 					     SK_FIELD, BPF_SIZE, EXTRA_OFF)
   8233 
   8234 static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type,
   8235 					   const struct bpf_insn *si,
   8236 					   struct bpf_insn *insn_buf,
   8237 					   struct bpf_prog *prog,
   8238 					   u32 *target_size)
   8239 {
   8240 	struct bpf_insn *insn = insn_buf;
   8241 
   8242 	switch (si->off) {
   8243 	case offsetof(struct sk_reuseport_md, data):
   8244 		SK_REUSEPORT_LOAD_SKB_FIELD(data);
   8245 		break;
   8246 
   8247 	case offsetof(struct sk_reuseport_md, len):
   8248 		SK_REUSEPORT_LOAD_SKB_FIELD(len);
   8249 		break;
   8250 
   8251 	case offsetof(struct sk_reuseport_md, eth_protocol):
   8252 		SK_REUSEPORT_LOAD_SKB_FIELD(protocol);
   8253 		break;
   8254 
   8255 	case offsetof(struct sk_reuseport_md, ip_protocol):
   8256 		BUILD_BUG_ON(HWEIGHT32(SK_FL_PROTO_MASK) != BITS_PER_BYTE);
   8257 		SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(__sk_flags_offset,
   8258 						    BPF_W, 0);
   8259 		*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
   8260 		*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg,
   8261 					SK_FL_PROTO_SHIFT);
   8262 		/* SK_FL_PROTO_MASK and SK_FL_PROTO_SHIFT are endian
   8263 		 * aware.  No further narrowing or masking is needed.
   8264 		 */
   8265 		*target_size = 1;
   8266 		break;
   8267 
   8268 	case offsetof(struct sk_reuseport_md, data_end):
   8269 		SK_REUSEPORT_LOAD_FIELD(data_end);
   8270 		break;
   8271 
   8272 	case offsetof(struct sk_reuseport_md, hash):
   8273 		SK_REUSEPORT_LOAD_FIELD(hash);
   8274 		break;
   8275 
   8276 	case offsetof(struct sk_reuseport_md, bind_inany):
   8277 		SK_REUSEPORT_LOAD_FIELD(bind_inany);
   8278 		break;
   8279 	}
   8280 
   8281 	return insn - insn_buf;
   8282 }
   8283 
   8284 const struct bpf_verifier_ops sk_reuseport_verifier_ops = {
   8285 	.get_func_proto		= sk_reuseport_func_proto,
   8286 	.is_valid_access	= sk_reuseport_is_valid_access,
   8287 	.convert_ctx_access	= sk_reuseport_convert_ctx_access,
   8288 };
   8289 
   8290 const struct bpf_prog_ops sk_reuseport_prog_ops = {
   8291 };
   8292 #endif /* CONFIG_INET */