whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

verifier.c (233751B)


      1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
      2  * Copyright (c) 2016 Facebook
      3  * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
      4  *
      5  * This program is free software; you can redistribute it and/or
      6  * modify it under the terms of version 2 of the GNU General Public
      7  * License as published by the Free Software Foundation.
      8  *
      9  * This program is distributed in the hope that it will be useful, but
     10  * WITHOUT ANY WARRANTY; without even the implied warranty of
     11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
     12  * General Public License for more details.
     13  */
     14 #include <uapi/linux/btf.h>
     15 #include <linux/kernel.h>
     16 #include <linux/types.h>
     17 #include <linux/slab.h>
     18 #include <linux/bpf.h>
     19 #include <linux/btf.h>
     20 #include <linux/bpf_verifier.h>
     21 #include <linux/filter.h>
     22 #include <net/netlink.h>
     23 #include <linux/file.h>
     24 #include <linux/vmalloc.h>
     25 #include <linux/stringify.h>
     26 #include <linux/bsearch.h>
     27 #include <linux/sort.h>
     28 #include <linux/perf_event.h>
     29 #include <linux/ctype.h>
     30 
     31 #include "disasm.h"
     32 
     33 static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
     34 #define BPF_PROG_TYPE(_id, _name) \
     35 	[_id] = & _name ## _verifier_ops,
     36 #define BPF_MAP_TYPE(_id, _ops)
     37 #include <linux/bpf_types.h>
     38 #undef BPF_PROG_TYPE
     39 #undef BPF_MAP_TYPE
     40 };
     41 
     42 /* bpf_check() is a static code analyzer that walks eBPF program
     43  * instruction by instruction and updates register/stack state.
     44  * All paths of conditional branches are analyzed until 'bpf_exit' insn.
     45  *
     46  * The first pass is depth-first-search to check that the program is a DAG.
     47  * It rejects the following programs:
     48  * - larger than BPF_MAXINSNS insns
     49  * - if loop is present (detected via back-edge)
     50  * - unreachable insns exist (shouldn't be a forest. program = one function)
     51  * - out of bounds or malformed jumps
     52  * The second pass is all possible path descent from the 1st insn.
     53  * Since it's analyzing all pathes through the program, the length of the
     54  * analysis is limited to 64k insn, which may be hit even if total number of
     55  * insn is less then 4K, but there are too many branches that change stack/regs.
     56  * Number of 'branches to be analyzed' is limited to 1k
     57  *
     58  * On entry to each instruction, each register has a type, and the instruction
     59  * changes the types of the registers depending on instruction semantics.
     60  * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
     61  * copied to R1.
     62  *
     63  * All registers are 64-bit.
     64  * R0 - return register
     65  * R1-R5 argument passing registers
     66  * R6-R9 callee saved registers
     67  * R10 - frame pointer read-only
     68  *
     69  * At the start of BPF program the register R1 contains a pointer to bpf_context
     70  * and has type PTR_TO_CTX.
     71  *
     72  * Verifier tracks arithmetic operations on pointers in case:
     73  *    BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
     74  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
     75  * 1st insn copies R10 (which has FRAME_PTR) type into R1
     76  * and 2nd arithmetic instruction is pattern matched to recognize
     77  * that it wants to construct a pointer to some element within stack.
     78  * So after 2nd insn, the register R1 has type PTR_TO_STACK
     79  * (and -20 constant is saved for further stack bounds checking).
     80  * Meaning that this reg is a pointer to stack plus known immediate constant.
     81  *
     82  * Most of the time the registers have SCALAR_VALUE type, which
     83  * means the register has some value, but it's not a valid pointer.
     84  * (like pointer plus pointer becomes SCALAR_VALUE type)
     85  *
     86  * When verifier sees load or store instructions the type of base register
     87  * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
     88  * four pointer types recognized by check_mem_access() function.
     89  *
     90  * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
     91  * and the range of [ptr, ptr + map's value_size) is accessible.
     92  *
     93  * registers used to pass values to function calls are checked against
     94  * function argument constraints.
     95  *
     96  * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
     97  * It means that the register type passed to this function must be
     98  * PTR_TO_STACK and it will be used inside the function as
     99  * 'pointer to map element key'
    100  *
    101  * For example the argument constraints for bpf_map_lookup_elem():
    102  *   .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
    103  *   .arg1_type = ARG_CONST_MAP_PTR,
    104  *   .arg2_type = ARG_PTR_TO_MAP_KEY,
    105  *
    106  * ret_type says that this function returns 'pointer to map elem value or null'
    107  * function expects 1st argument to be a const pointer to 'struct bpf_map' and
    108  * 2nd argument should be a pointer to stack, which will be used inside
    109  * the helper function as a pointer to map element key.
    110  *
    111  * On the kernel side the helper function looks like:
    112  * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
    113  * {
    114  *    struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
    115  *    void *key = (void *) (unsigned long) r2;
    116  *    void *value;
    117  *
    118  *    here kernel can access 'key' and 'map' pointers safely, knowing that
    119  *    [key, key + map->key_size) bytes are valid and were initialized on
    120  *    the stack of eBPF program.
    121  * }
    122  *
    123  * Corresponding eBPF program may look like:
    124  *    BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  // after this insn R2 type is FRAME_PTR
    125  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
    126  *    BPF_LD_MAP_FD(BPF_REG_1, map_fd),      // after this insn R1 type is CONST_PTR_TO_MAP
    127  *    BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
    128  * here verifier looks at prototype of map_lookup_elem() and sees:
    129  * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
    130  * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
    131  *
    132  * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
    133  * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
    134  * and were initialized prior to this call.
    135  * If it's ok, then verifier allows this BPF_CALL insn and looks at
    136  * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
    137  * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
    138  * returns ether pointer to map value or NULL.
    139  *
    140  * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
    141  * insn, the register holding that pointer in the true branch changes state to
    142  * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
    143  * branch. See check_cond_jmp_op().
    144  *
    145  * After the call R0 is set to return type of the function and registers R1-R5
    146  * are set to NOT_INIT to indicate that they are no longer readable.
    147  *
    148  * The following reference types represent a potential reference to a kernel
    149  * resource which, after first being allocated, must be checked and freed by
    150  * the BPF program:
    151  * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
    152  *
    153  * When the verifier sees a helper call return a reference type, it allocates a
    154  * pointer id for the reference and stores it in the current function state.
    155  * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
    156  * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
    157  * passes through a NULL-check conditional. For the branch wherein the state is
    158  * changed to CONST_IMM, the verifier releases the reference.
    159  *
    160  * For each helper function that allocates a reference, such as
    161  * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
    162  * bpf_sk_release(). When a reference type passes into the release function,
    163  * the verifier also releases the reference. If any unchecked or unreleased
    164  * reference remains at the end of the program, the verifier rejects it.
    165  */
    166 
    167 /* verifier_state + insn_idx are pushed to stack when branch is encountered */
    168 struct bpf_verifier_stack_elem {
    169 	/* verifer state is 'st'
    170 	 * before processing instruction 'insn_idx'
    171 	 * and after processing instruction 'prev_insn_idx'
    172 	 */
    173 	struct bpf_verifier_state st;
    174 	int insn_idx;
    175 	int prev_insn_idx;
    176 	struct bpf_verifier_stack_elem *next;
    177 };
    178 
    179 #define BPF_COMPLEXITY_LIMIT_INSNS	131072
    180 #define BPF_COMPLEXITY_LIMIT_STACK	1024
    181 #define BPF_COMPLEXITY_LIMIT_STATES	64
    182 
    183 #define BPF_MAP_PTR_UNPRIV	1UL
    184 #define BPF_MAP_PTR_POISON	((void *)((0xeB9FUL << 1) +	\
    185 					  POISON_POINTER_DELTA))
    186 #define BPF_MAP_PTR(X)		((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
    187 
    188 static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
    189 {
    190 	return BPF_MAP_PTR(aux->map_state) == BPF_MAP_PTR_POISON;
    191 }
    192 
    193 static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
    194 {
    195 	return aux->map_state & BPF_MAP_PTR_UNPRIV;
    196 }
    197 
    198 static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
    199 			      const struct bpf_map *map, bool unpriv)
    200 {
    201 	BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
    202 	unpriv |= bpf_map_ptr_unpriv(aux);
    203 	aux->map_state = (unsigned long)map |
    204 			 (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
    205 }
    206 
    207 struct bpf_call_arg_meta {
    208 	struct bpf_map *map_ptr;
    209 	bool raw_mode;
    210 	bool pkt_access;
    211 	int regno;
    212 	int access_size;
    213 	s64 msize_smax_value;
    214 	u64 msize_umax_value;
    215 	int ref_obj_id;
    216 	int func_id;
    217 };
    218 
    219 static DEFINE_MUTEX(bpf_verifier_lock);
    220 
    221 static const struct bpf_line_info *
    222 find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
    223 {
    224 	const struct bpf_line_info *linfo;
    225 	const struct bpf_prog *prog;
    226 	u32 i, nr_linfo;
    227 
    228 	prog = env->prog;
    229 	nr_linfo = prog->aux->nr_linfo;
    230 
    231 	if (!nr_linfo || insn_off >= prog->len)
    232 		return NULL;
    233 
    234 	linfo = prog->aux->linfo;
    235 	for (i = 1; i < nr_linfo; i++)
    236 		if (insn_off < linfo[i].insn_off)
    237 			break;
    238 
    239 	return &linfo[i - 1];
    240 }
    241 
    242 void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
    243 		       va_list args)
    244 {
    245 	unsigned int n;
    246 
    247 	n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
    248 
    249 	WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
    250 		  "verifier log line truncated - local buffer too short\n");
    251 
    252 	n = min(log->len_total - log->len_used - 1, n);
    253 	log->kbuf[n] = '\0';
    254 
    255 	if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
    256 		log->len_used += n;
    257 	else
    258 		log->ubuf = NULL;
    259 }
    260 
    261 /* log_level controls verbosity level of eBPF verifier.
    262  * bpf_verifier_log_write() is used to dump the verification trace to the log,
    263  * so the user can figure out what's wrong with the program
    264  */
    265 __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
    266 					   const char *fmt, ...)
    267 {
    268 	va_list args;
    269 
    270 	if (!bpf_verifier_log_needed(&env->log))
    271 		return;
    272 
    273 	va_start(args, fmt);
    274 	bpf_verifier_vlog(&env->log, fmt, args);
    275 	va_end(args);
    276 }
    277 EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
    278 
    279 __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
    280 {
    281 	struct bpf_verifier_env *env = private_data;
    282 	va_list args;
    283 
    284 	if (!bpf_verifier_log_needed(&env->log))
    285 		return;
    286 
    287 	va_start(args, fmt);
    288 	bpf_verifier_vlog(&env->log, fmt, args);
    289 	va_end(args);
    290 }
    291 
    292 static const char *ltrim(const char *s)
    293 {
    294 	while (isspace(*s))
    295 		s++;
    296 
    297 	return s;
    298 }
    299 
    300 __printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env,
    301 					 u32 insn_off,
    302 					 const char *prefix_fmt, ...)
    303 {
    304 	const struct bpf_line_info *linfo;
    305 
    306 	if (!bpf_verifier_log_needed(&env->log))
    307 		return;
    308 
    309 	linfo = find_linfo(env, insn_off);
    310 	if (!linfo || linfo == env->prev_linfo)
    311 		return;
    312 
    313 	if (prefix_fmt) {
    314 		va_list args;
    315 
    316 		va_start(args, prefix_fmt);
    317 		bpf_verifier_vlog(&env->log, prefix_fmt, args);
    318 		va_end(args);
    319 	}
    320 
    321 	verbose(env, "%s\n",
    322 		ltrim(btf_name_by_offset(env->prog->aux->btf,
    323 					 linfo->line_off)));
    324 
    325 	env->prev_linfo = linfo;
    326 }
    327 
    328 static bool type_is_pkt_pointer(enum bpf_reg_type type)
    329 {
    330 	return type == PTR_TO_PACKET ||
    331 	       type == PTR_TO_PACKET_META;
    332 }
    333 
    334 static bool type_is_sk_pointer(enum bpf_reg_type type)
    335 {
    336 	return type == PTR_TO_SOCKET ||
    337 		type == PTR_TO_SOCK_COMMON ||
    338 		type == PTR_TO_TCP_SOCK;
    339 }
    340 
    341 static bool reg_type_may_be_null(enum bpf_reg_type type)
    342 {
    343 	return type == PTR_TO_MAP_VALUE_OR_NULL ||
    344 	       type == PTR_TO_SOCKET_OR_NULL ||
    345 	       type == PTR_TO_SOCK_COMMON_OR_NULL ||
    346 	       type == PTR_TO_TCP_SOCK_OR_NULL;
    347 }
    348 
    349 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
    350 {
    351 	return reg->type == PTR_TO_MAP_VALUE &&
    352 		map_value_has_spin_lock(reg->map_ptr);
    353 }
    354 
    355 static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
    356 {
    357 	return type == PTR_TO_SOCKET ||
    358 		type == PTR_TO_SOCKET_OR_NULL ||
    359 		type == PTR_TO_TCP_SOCK ||
    360 		type == PTR_TO_TCP_SOCK_OR_NULL;
    361 }
    362 
    363 static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
    364 {
    365 	return type == ARG_PTR_TO_SOCK_COMMON;
    366 }
    367 
    368 /* Determine whether the function releases some resources allocated by another
    369  * function call. The first reference type argument will be assumed to be
    370  * released by release_reference().
    371  */
    372 static bool is_release_function(enum bpf_func_id func_id)
    373 {
    374 	return func_id == BPF_FUNC_sk_release;
    375 }
    376 
    377 static bool is_acquire_function(enum bpf_func_id func_id)
    378 {
    379 	return func_id == BPF_FUNC_sk_lookup_tcp ||
    380 		func_id == BPF_FUNC_sk_lookup_udp;
    381 }
    382 
    383 static bool is_ptr_cast_function(enum bpf_func_id func_id)
    384 {
    385 	return func_id == BPF_FUNC_tcp_sock ||
    386 		func_id == BPF_FUNC_sk_fullsock;
    387 }
    388 
    389 /* string representation of 'enum bpf_reg_type' */
    390 static const char * const reg_type_str[] = {
    391 	[NOT_INIT]		= "?",
    392 	[SCALAR_VALUE]		= "inv",
    393 	[PTR_TO_CTX]		= "ctx",
    394 	[CONST_PTR_TO_MAP]	= "map_ptr",
    395 	[PTR_TO_MAP_VALUE]	= "map_value",
    396 	[PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
    397 	[PTR_TO_STACK]		= "fp",
    398 	[PTR_TO_PACKET]		= "pkt",
    399 	[PTR_TO_PACKET_META]	= "pkt_meta",
    400 	[PTR_TO_PACKET_END]	= "pkt_end",
    401 	[PTR_TO_FLOW_KEYS]	= "flow_keys",
    402 	[PTR_TO_SOCKET]		= "sock",
    403 	[PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
    404 	[PTR_TO_SOCK_COMMON]	= "sock_common",
    405 	[PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
    406 	[PTR_TO_TCP_SOCK]	= "tcp_sock",
    407 	[PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
    408 };
    409 
    410 static char slot_type_char[] = {
    411 	[STACK_INVALID]	= '?',
    412 	[STACK_SPILL]	= 'r',
    413 	[STACK_MISC]	= 'm',
    414 	[STACK_ZERO]	= '0',
    415 };
    416 
    417 static void print_liveness(struct bpf_verifier_env *env,
    418 			   enum bpf_reg_liveness live)
    419 {
    420 	if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE))
    421 	    verbose(env, "_");
    422 	if (live & REG_LIVE_READ)
    423 		verbose(env, "r");
    424 	if (live & REG_LIVE_WRITTEN)
    425 		verbose(env, "w");
    426 	if (live & REG_LIVE_DONE)
    427 		verbose(env, "D");
    428 }
    429 
    430 static struct bpf_func_state *func(struct bpf_verifier_env *env,
    431 				   const struct bpf_reg_state *reg)
    432 {
    433 	struct bpf_verifier_state *cur = env->cur_state;
    434 
    435 	return cur->frame[reg->frameno];
    436 }
    437 
    438 static void print_verifier_state(struct bpf_verifier_env *env,
    439 				 const struct bpf_func_state *state)
    440 {
    441 	const struct bpf_reg_state *reg;
    442 	enum bpf_reg_type t;
    443 	int i;
    444 
    445 	if (state->frameno)
    446 		verbose(env, " frame%d:", state->frameno);
    447 	for (i = 0; i < MAX_BPF_REG; i++) {
    448 		reg = &state->regs[i];
    449 		t = reg->type;
    450 		if (t == NOT_INIT)
    451 			continue;
    452 		verbose(env, " R%d", i);
    453 		print_liveness(env, reg->live);
    454 		verbose(env, "=%s", reg_type_str[t]);
    455 		if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
    456 		    tnum_is_const(reg->var_off)) {
    457 			/* reg->off should be 0 for SCALAR_VALUE */
    458 			verbose(env, "%lld", reg->var_off.value + reg->off);
    459 			if (t == PTR_TO_STACK)
    460 				verbose(env, ",call_%d", func(env, reg)->callsite);
    461 		} else {
    462 			verbose(env, "(id=%d", reg->id);
    463 			if (reg_type_may_be_refcounted_or_null(t))
    464 				verbose(env, ",ref_obj_id=%d", reg->ref_obj_id);
    465 			if (t != SCALAR_VALUE)
    466 				verbose(env, ",off=%d", reg->off);
    467 			if (type_is_pkt_pointer(t))
    468 				verbose(env, ",r=%d", reg->range);
    469 			else if (t == CONST_PTR_TO_MAP ||
    470 				 t == PTR_TO_MAP_VALUE ||
    471 				 t == PTR_TO_MAP_VALUE_OR_NULL)
    472 				verbose(env, ",ks=%d,vs=%d",
    473 					reg->map_ptr->key_size,
    474 					reg->map_ptr->value_size);
    475 			if (tnum_is_const(reg->var_off)) {
    476 				/* Typically an immediate SCALAR_VALUE, but
    477 				 * could be a pointer whose offset is too big
    478 				 * for reg->off
    479 				 */
    480 				verbose(env, ",imm=%llx", reg->var_off.value);
    481 			} else {
    482 				if (reg->smin_value != reg->umin_value &&
    483 				    reg->smin_value != S64_MIN)
    484 					verbose(env, ",smin_value=%lld",
    485 						(long long)reg->smin_value);
    486 				if (reg->smax_value != reg->umax_value &&
    487 				    reg->smax_value != S64_MAX)
    488 					verbose(env, ",smax_value=%lld",
    489 						(long long)reg->smax_value);
    490 				if (reg->umin_value != 0)
    491 					verbose(env, ",umin_value=%llu",
    492 						(unsigned long long)reg->umin_value);
    493 				if (reg->umax_value != U64_MAX)
    494 					verbose(env, ",umax_value=%llu",
    495 						(unsigned long long)reg->umax_value);
    496 				if (!tnum_is_unknown(reg->var_off)) {
    497 					char tn_buf[48];
    498 
    499 					tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
    500 					verbose(env, ",var_off=%s", tn_buf);
    501 				}
    502 			}
    503 			verbose(env, ")");
    504 		}
    505 	}
    506 	for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
    507 		char types_buf[BPF_REG_SIZE + 1];
    508 		bool valid = false;
    509 		int j;
    510 
    511 		for (j = 0; j < BPF_REG_SIZE; j++) {
    512 			if (state->stack[i].slot_type[j] != STACK_INVALID)
    513 				valid = true;
    514 			types_buf[j] = slot_type_char[
    515 					state->stack[i].slot_type[j]];
    516 		}
    517 		types_buf[BPF_REG_SIZE] = 0;
    518 		if (!valid)
    519 			continue;
    520 		verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
    521 		print_liveness(env, state->stack[i].spilled_ptr.live);
    522 		if (state->stack[i].slot_type[0] == STACK_SPILL)
    523 			verbose(env, "=%s",
    524 				reg_type_str[state->stack[i].spilled_ptr.type]);
    525 		else
    526 			verbose(env, "=%s", types_buf);
    527 	}
    528 	if (state->acquired_refs && state->refs[0].id) {
    529 		verbose(env, " refs=%d", state->refs[0].id);
    530 		for (i = 1; i < state->acquired_refs; i++)
    531 			if (state->refs[i].id)
    532 				verbose(env, ",%d", state->refs[i].id);
    533 	}
    534 	verbose(env, "\n");
    535 }
    536 
    537 #define COPY_STATE_FN(NAME, COUNT, FIELD, SIZE)				\
    538 static int copy_##NAME##_state(struct bpf_func_state *dst,		\
    539 			       const struct bpf_func_state *src)	\
    540 {									\
    541 	if (!src->FIELD)						\
    542 		return 0;						\
    543 	if (WARN_ON_ONCE(dst->COUNT < src->COUNT)) {			\
    544 		/* internal bug, make state invalid to reject the program */ \
    545 		memset(dst, 0, sizeof(*dst));				\
    546 		return -EFAULT;						\
    547 	}								\
    548 	memcpy(dst->FIELD, src->FIELD,					\
    549 	       sizeof(*src->FIELD) * (src->COUNT / SIZE));		\
    550 	return 0;							\
    551 }
    552 /* copy_reference_state() */
    553 COPY_STATE_FN(reference, acquired_refs, refs, 1)
    554 /* copy_stack_state() */
    555 COPY_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
    556 #undef COPY_STATE_FN
    557 
    558 #define REALLOC_STATE_FN(NAME, COUNT, FIELD, SIZE)			\
    559 static int realloc_##NAME##_state(struct bpf_func_state *state, int size, \
    560 				  bool copy_old)			\
    561 {									\
    562 	u32 old_size = state->COUNT;					\
    563 	struct bpf_##NAME##_state *new_##FIELD;				\
    564 	int slot = size / SIZE;						\
    565 									\
    566 	if (size <= old_size || !size) {				\
    567 		if (copy_old)						\
    568 			return 0;					\
    569 		state->COUNT = slot * SIZE;				\
    570 		if (!size && old_size) {				\
    571 			kfree(state->FIELD);				\
    572 			state->FIELD = NULL;				\
    573 		}							\
    574 		return 0;						\
    575 	}								\
    576 	new_##FIELD = kmalloc_array(slot, sizeof(struct bpf_##NAME##_state), \
    577 				    GFP_KERNEL);			\
    578 	if (!new_##FIELD)						\
    579 		return -ENOMEM;						\
    580 	if (copy_old) {							\
    581 		if (state->FIELD)					\
    582 			memcpy(new_##FIELD, state->FIELD,		\
    583 			       sizeof(*new_##FIELD) * (old_size / SIZE)); \
    584 		memset(new_##FIELD + old_size / SIZE, 0,		\
    585 		       sizeof(*new_##FIELD) * (size - old_size) / SIZE); \
    586 	}								\
    587 	state->COUNT = slot * SIZE;					\
    588 	kfree(state->FIELD);						\
    589 	state->FIELD = new_##FIELD;					\
    590 	return 0;							\
    591 }
    592 /* realloc_reference_state() */
    593 REALLOC_STATE_FN(reference, acquired_refs, refs, 1)
    594 /* realloc_stack_state() */
    595 REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
    596 #undef REALLOC_STATE_FN
    597 
    598 /* do_check() starts with zero-sized stack in struct bpf_verifier_state to
    599  * make it consume minimal amount of memory. check_stack_write() access from
    600  * the program calls into realloc_func_state() to grow the stack size.
    601  * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
    602  * which realloc_stack_state() copies over. It points to previous
    603  * bpf_verifier_state which is never reallocated.
    604  */
    605 static int realloc_func_state(struct bpf_func_state *state, int stack_size,
    606 			      int refs_size, bool copy_old)
    607 {
    608 	int err = realloc_reference_state(state, refs_size, copy_old);
    609 	if (err)
    610 		return err;
    611 	return realloc_stack_state(state, stack_size, copy_old);
    612 }
    613 
    614 /* Acquire a pointer id from the env and update the state->refs to include
    615  * this new pointer reference.
    616  * On success, returns a valid pointer id to associate with the register
    617  * On failure, returns a negative errno.
    618  */
    619 static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
    620 {
    621 	struct bpf_func_state *state = cur_func(env);
    622 	int new_ofs = state->acquired_refs;
    623 	int id, err;
    624 
    625 	err = realloc_reference_state(state, state->acquired_refs + 1, true);
    626 	if (err)
    627 		return err;
    628 	id = ++env->id_gen;
    629 	state->refs[new_ofs].id = id;
    630 	state->refs[new_ofs].insn_idx = insn_idx;
    631 
    632 	return id;
    633 }
    634 
    635 /* release function corresponding to acquire_reference_state(). Idempotent. */
    636 static int release_reference_state(struct bpf_func_state *state, int ptr_id)
    637 {
    638 	int i, last_idx;
    639 
    640 	last_idx = state->acquired_refs - 1;
    641 	for (i = 0; i < state->acquired_refs; i++) {
    642 		if (state->refs[i].id == ptr_id) {
    643 			if (last_idx && i != last_idx)
    644 				memcpy(&state->refs[i], &state->refs[last_idx],
    645 				       sizeof(*state->refs));
    646 			memset(&state->refs[last_idx], 0, sizeof(*state->refs));
    647 			state->acquired_refs--;
    648 			return 0;
    649 		}
    650 	}
    651 	return -EINVAL;
    652 }
    653 
    654 static int transfer_reference_state(struct bpf_func_state *dst,
    655 				    struct bpf_func_state *src)
    656 {
    657 	int err = realloc_reference_state(dst, src->acquired_refs, false);
    658 	if (err)
    659 		return err;
    660 	err = copy_reference_state(dst, src);
    661 	if (err)
    662 		return err;
    663 	return 0;
    664 }
    665 
    666 static void free_func_state(struct bpf_func_state *state)
    667 {
    668 	if (!state)
    669 		return;
    670 	kfree(state->refs);
    671 	kfree(state->stack);
    672 	kfree(state);
    673 }
    674 
    675 static void free_verifier_state(struct bpf_verifier_state *state,
    676 				bool free_self)
    677 {
    678 	int i;
    679 
    680 	for (i = 0; i <= state->curframe; i++) {
    681 		free_func_state(state->frame[i]);
    682 		state->frame[i] = NULL;
    683 	}
    684 	if (free_self)
    685 		kfree(state);
    686 }
    687 
    688 /* copy verifier state from src to dst growing dst stack space
    689  * when necessary to accommodate larger src stack
    690  */
    691 static int copy_func_state(struct bpf_func_state *dst,
    692 			   const struct bpf_func_state *src)
    693 {
    694 	int err;
    695 
    696 	err = realloc_func_state(dst, src->allocated_stack, src->acquired_refs,
    697 				 false);
    698 	if (err)
    699 		return err;
    700 	memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
    701 	err = copy_reference_state(dst, src);
    702 	if (err)
    703 		return err;
    704 	return copy_stack_state(dst, src);
    705 }
    706 
    707 static int copy_verifier_state(struct bpf_verifier_state *dst_state,
    708 			       const struct bpf_verifier_state *src)
    709 {
    710 	struct bpf_func_state *dst;
    711 	int i, err;
    712 
    713 	/* if dst has more stack frames then src frame, free them */
    714 	for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
    715 		free_func_state(dst_state->frame[i]);
    716 		dst_state->frame[i] = NULL;
    717 	}
    718 	dst_state->speculative = src->speculative;
    719 	dst_state->curframe = src->curframe;
    720 	dst_state->active_spin_lock = src->active_spin_lock;
    721 	for (i = 0; i <= src->curframe; i++) {
    722 		dst = dst_state->frame[i];
    723 		if (!dst) {
    724 			dst = kzalloc(sizeof(*dst), GFP_KERNEL);
    725 			if (!dst)
    726 				return -ENOMEM;
    727 			dst_state->frame[i] = dst;
    728 		}
    729 		err = copy_func_state(dst, src->frame[i]);
    730 		if (err)
    731 			return err;
    732 	}
    733 	return 0;
    734 }
    735 
    736 static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
    737 		     int *insn_idx)
    738 {
    739 	struct bpf_verifier_state *cur = env->cur_state;
    740 	struct bpf_verifier_stack_elem *elem, *head = env->head;
    741 	int err;
    742 
    743 	if (env->head == NULL)
    744 		return -ENOENT;
    745 
    746 	if (cur) {
    747 		err = copy_verifier_state(cur, &head->st);
    748 		if (err)
    749 			return err;
    750 	}
    751 	if (insn_idx)
    752 		*insn_idx = head->insn_idx;
    753 	if (prev_insn_idx)
    754 		*prev_insn_idx = head->prev_insn_idx;
    755 	elem = head->next;
    756 	free_verifier_state(&head->st, false);
    757 	kfree(head);
    758 	env->head = elem;
    759 	env->stack_size--;
    760 	return 0;
    761 }
    762 
    763 static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
    764 					     int insn_idx, int prev_insn_idx,
    765 					     bool speculative)
    766 {
    767 	struct bpf_verifier_state *cur = env->cur_state;
    768 	struct bpf_verifier_stack_elem *elem;
    769 	int err;
    770 
    771 	elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
    772 	if (!elem)
    773 		goto err;
    774 
    775 	elem->insn_idx = insn_idx;
    776 	elem->prev_insn_idx = prev_insn_idx;
    777 	elem->next = env->head;
    778 	env->head = elem;
    779 	env->stack_size++;
    780 	err = copy_verifier_state(&elem->st, cur);
    781 	if (err)
    782 		goto err;
    783 	elem->st.speculative |= speculative;
    784 	if (env->stack_size > BPF_COMPLEXITY_LIMIT_STACK) {
    785 		verbose(env, "BPF program is too complex\n");
    786 		goto err;
    787 	}
    788 	return &elem->st;
    789 err:
    790 	free_verifier_state(env->cur_state, true);
    791 	env->cur_state = NULL;
    792 	/* pop all elements and return */
    793 	while (!pop_stack(env, NULL, NULL));
    794 	return NULL;
    795 }
    796 
    797 #define CALLER_SAVED_REGS 6
    798 static const int caller_saved[CALLER_SAVED_REGS] = {
    799 	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
    800 };
    801 
    802 static void __mark_reg_not_init(struct bpf_reg_state *reg);
    803 
    804 /* Mark the unknown part of a register (variable offset or scalar value) as
    805  * known to have the value @imm.
    806  */
    807 static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
    808 {
    809 	/* Clear id, off, and union(map_ptr, range) */
    810 	memset(((u8 *)reg) + sizeof(reg->type), 0,
    811 	       offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
    812 	reg->var_off = tnum_const(imm);
    813 	reg->smin_value = (s64)imm;
    814 	reg->smax_value = (s64)imm;
    815 	reg->umin_value = imm;
    816 	reg->umax_value = imm;
    817 }
    818 
    819 /* Mark the 'variable offset' part of a register as zero.  This should be
    820  * used only on registers holding a pointer type.
    821  */
    822 static void __mark_reg_known_zero(struct bpf_reg_state *reg)
    823 {
    824 	__mark_reg_known(reg, 0);
    825 }
    826 
    827 static void __mark_reg_const_zero(struct bpf_reg_state *reg)
    828 {
    829 	__mark_reg_known(reg, 0);
    830 	reg->type = SCALAR_VALUE;
    831 }
    832 
    833 static void mark_reg_known_zero(struct bpf_verifier_env *env,
    834 				struct bpf_reg_state *regs, u32 regno)
    835 {
    836 	if (WARN_ON(regno >= MAX_BPF_REG)) {
    837 		verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
    838 		/* Something bad happened, let's kill all regs */
    839 		for (regno = 0; regno < MAX_BPF_REG; regno++)
    840 			__mark_reg_not_init(regs + regno);
    841 		return;
    842 	}
    843 	__mark_reg_known_zero(regs + regno);
    844 }
    845 
    846 static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
    847 {
    848 	return type_is_pkt_pointer(reg->type);
    849 }
    850 
    851 static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
    852 {
    853 	return reg_is_pkt_pointer(reg) ||
    854 	       reg->type == PTR_TO_PACKET_END;
    855 }
    856 
    857 /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
    858 static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
    859 				    enum bpf_reg_type which)
    860 {
    861 	/* The register can already have a range from prior markings.
    862 	 * This is fine as long as it hasn't been advanced from its
    863 	 * origin.
    864 	 */
    865 	return reg->type == which &&
    866 	       reg->id == 0 &&
    867 	       reg->off == 0 &&
    868 	       tnum_equals_const(reg->var_off, 0);
    869 }
    870 
    871 /* Attempts to improve min/max values based on var_off information */
    872 static void __update_reg_bounds(struct bpf_reg_state *reg)
    873 {
    874 	/* min signed is max(sign bit) | min(other bits) */
    875 	reg->smin_value = max_t(s64, reg->smin_value,
    876 				reg->var_off.value | (reg->var_off.mask & S64_MIN));
    877 	/* max signed is min(sign bit) | max(other bits) */
    878 	reg->smax_value = min_t(s64, reg->smax_value,
    879 				reg->var_off.value | (reg->var_off.mask & S64_MAX));
    880 	reg->umin_value = max(reg->umin_value, reg->var_off.value);
    881 	reg->umax_value = min(reg->umax_value,
    882 			      reg->var_off.value | reg->var_off.mask);
    883 }
    884 
    885 /* Uses signed min/max values to inform unsigned, and vice-versa */
    886 static void __reg_deduce_bounds(struct bpf_reg_state *reg)
    887 {
    888 	/* Learn sign from signed bounds.
    889 	 * If we cannot cross the sign boundary, then signed and unsigned bounds
    890 	 * are the same, so combine.  This works even in the negative case, e.g.
    891 	 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
    892 	 */
    893 	if (reg->smin_value >= 0 || reg->smax_value < 0) {
    894 		reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
    895 							  reg->umin_value);
    896 		reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
    897 							  reg->umax_value);
    898 		return;
    899 	}
    900 	/* Learn sign from unsigned bounds.  Signed bounds cross the sign
    901 	 * boundary, so we must be careful.
    902 	 */
    903 	if ((s64)reg->umax_value >= 0) {
    904 		/* Positive.  We can't learn anything from the smin, but smax
    905 		 * is positive, hence safe.
    906 		 */
    907 		reg->smin_value = reg->umin_value;
    908 		reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
    909 							  reg->umax_value);
    910 	} else if ((s64)reg->umin_value < 0) {
    911 		/* Negative.  We can't learn anything from the smax, but smin
    912 		 * is negative, hence safe.
    913 		 */
    914 		reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
    915 							  reg->umin_value);
    916 		reg->smax_value = reg->umax_value;
    917 	}
    918 }
    919 
    920 /* Attempts to improve var_off based on unsigned min/max information */
    921 static void __reg_bound_offset(struct bpf_reg_state *reg)
    922 {
    923 	reg->var_off = tnum_intersect(reg->var_off,
    924 				      tnum_range(reg->umin_value,
    925 						 reg->umax_value));
    926 }
    927 
    928 /* Reset the min/max bounds of a register */
    929 static void __mark_reg_unbounded(struct bpf_reg_state *reg)
    930 {
    931 	reg->smin_value = S64_MIN;
    932 	reg->smax_value = S64_MAX;
    933 	reg->umin_value = 0;
    934 	reg->umax_value = U64_MAX;
    935 }
    936 
    937 /* Mark a register as having a completely unknown (scalar) value. */
    938 static void __mark_reg_unknown(struct bpf_reg_state *reg)
    939 {
    940 	/*
    941 	 * Clear type, id, off, and union(map_ptr, range) and
    942 	 * padding between 'type' and union
    943 	 */
    944 	memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
    945 	reg->type = SCALAR_VALUE;
    946 	reg->var_off = tnum_unknown;
    947 	reg->frameno = 0;
    948 	__mark_reg_unbounded(reg);
    949 }
    950 
    951 static void mark_reg_unknown(struct bpf_verifier_env *env,
    952 			     struct bpf_reg_state *regs, u32 regno)
    953 {
    954 	if (WARN_ON(regno >= MAX_BPF_REG)) {
    955 		verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
    956 		/* Something bad happened, let's kill all regs except FP */
    957 		for (regno = 0; regno < BPF_REG_FP; regno++)
    958 			__mark_reg_not_init(regs + regno);
    959 		return;
    960 	}
    961 	__mark_reg_unknown(regs + regno);
    962 }
    963 
    964 static void __mark_reg_not_init(struct bpf_reg_state *reg)
    965 {
    966 	__mark_reg_unknown(reg);
    967 	reg->type = NOT_INIT;
    968 }
    969 
    970 static void mark_reg_not_init(struct bpf_verifier_env *env,
    971 			      struct bpf_reg_state *regs, u32 regno)
    972 {
    973 	if (WARN_ON(regno >= MAX_BPF_REG)) {
    974 		verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
    975 		/* Something bad happened, let's kill all regs except FP */
    976 		for (regno = 0; regno < BPF_REG_FP; regno++)
    977 			__mark_reg_not_init(regs + regno);
    978 		return;
    979 	}
    980 	__mark_reg_not_init(regs + regno);
    981 }
    982 
    983 static void init_reg_state(struct bpf_verifier_env *env,
    984 			   struct bpf_func_state *state)
    985 {
    986 	struct bpf_reg_state *regs = state->regs;
    987 	int i;
    988 
    989 	for (i = 0; i < MAX_BPF_REG; i++) {
    990 		mark_reg_not_init(env, regs, i);
    991 		regs[i].live = REG_LIVE_NONE;
    992 		regs[i].parent = NULL;
    993 	}
    994 
    995 	/* frame pointer */
    996 	regs[BPF_REG_FP].type = PTR_TO_STACK;
    997 	mark_reg_known_zero(env, regs, BPF_REG_FP);
    998 	regs[BPF_REG_FP].frameno = state->frameno;
    999 
   1000 	/* 1st arg to a function */
   1001 	regs[BPF_REG_1].type = PTR_TO_CTX;
   1002 	mark_reg_known_zero(env, regs, BPF_REG_1);
   1003 }
   1004 
   1005 #define BPF_MAIN_FUNC (-1)
   1006 static void init_func_state(struct bpf_verifier_env *env,
   1007 			    struct bpf_func_state *state,
   1008 			    int callsite, int frameno, int subprogno)
   1009 {
   1010 	state->callsite = callsite;
   1011 	state->frameno = frameno;
   1012 	state->subprogno = subprogno;
   1013 	init_reg_state(env, state);
   1014 }
   1015 
   1016 enum reg_arg_type {
   1017 	SRC_OP,		/* register is used as source operand */
   1018 	DST_OP,		/* register is used as destination operand */
   1019 	DST_OP_NO_MARK	/* same as above, check only, don't mark */
   1020 };
   1021 
   1022 static int cmp_subprogs(const void *a, const void *b)
   1023 {
   1024 	return ((struct bpf_subprog_info *)a)->start -
   1025 	       ((struct bpf_subprog_info *)b)->start;
   1026 }
   1027 
   1028 static int find_subprog(struct bpf_verifier_env *env, int off)
   1029 {
   1030 	struct bpf_subprog_info *p;
   1031 
   1032 	p = bsearch(&off, env->subprog_info, env->subprog_cnt,
   1033 		    sizeof(env->subprog_info[0]), cmp_subprogs);
   1034 	if (!p)
   1035 		return -ENOENT;
   1036 	return p - env->subprog_info;
   1037 
   1038 }
   1039 
   1040 static int add_subprog(struct bpf_verifier_env *env, int off)
   1041 {
   1042 	int insn_cnt = env->prog->len;
   1043 	int ret;
   1044 
   1045 	if (off >= insn_cnt || off < 0) {
   1046 		verbose(env, "call to invalid destination\n");
   1047 		return -EINVAL;
   1048 	}
   1049 	ret = find_subprog(env, off);
   1050 	if (ret >= 0)
   1051 		return 0;
   1052 	if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
   1053 		verbose(env, "too many subprograms\n");
   1054 		return -E2BIG;
   1055 	}
   1056 	env->subprog_info[env->subprog_cnt++].start = off;
   1057 	sort(env->subprog_info, env->subprog_cnt,
   1058 	     sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
   1059 	return 0;
   1060 }
   1061 
   1062 static int check_subprogs(struct bpf_verifier_env *env)
   1063 {
   1064 	int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
   1065 	struct bpf_subprog_info *subprog = env->subprog_info;
   1066 	struct bpf_insn *insn = env->prog->insnsi;
   1067 	int insn_cnt = env->prog->len;
   1068 
   1069 	/* Add entry function. */
   1070 	ret = add_subprog(env, 0);
   1071 	if (ret < 0)
   1072 		return ret;
   1073 
   1074 	/* determine subprog starts. The end is one before the next starts */
   1075 	for (i = 0; i < insn_cnt; i++) {
   1076 		if (insn[i].code != (BPF_JMP | BPF_CALL))
   1077 			continue;
   1078 		if (insn[i].src_reg != BPF_PSEUDO_CALL)
   1079 			continue;
   1080 		if (!env->allow_ptr_leaks) {
   1081 			verbose(env, "function calls to other bpf functions are allowed for root only\n");
   1082 			return -EPERM;
   1083 		}
   1084 		ret = add_subprog(env, i + insn[i].imm + 1);
   1085 		if (ret < 0)
   1086 			return ret;
   1087 	}
   1088 
   1089 	/* Add a fake 'exit' subprog which could simplify subprog iteration
   1090 	 * logic. 'subprog_cnt' should not be increased.
   1091 	 */
   1092 	subprog[env->subprog_cnt].start = insn_cnt;
   1093 
   1094 	if (env->log.level > 1)
   1095 		for (i = 0; i < env->subprog_cnt; i++)
   1096 			verbose(env, "func#%d @%d\n", i, subprog[i].start);
   1097 
   1098 	/* now check that all jumps are within the same subprog */
   1099 	subprog_start = subprog[cur_subprog].start;
   1100 	subprog_end = subprog[cur_subprog + 1].start;
   1101 	for (i = 0; i < insn_cnt; i++) {
   1102 		u8 code = insn[i].code;
   1103 
   1104 		if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
   1105 			goto next;
   1106 		if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
   1107 			goto next;
   1108 		off = i + insn[i].off + 1;
   1109 		if (off < subprog_start || off >= subprog_end) {
   1110 			verbose(env, "jump out of range from insn %d to %d\n", i, off);
   1111 			return -EINVAL;
   1112 		}
   1113 next:
   1114 		if (i == subprog_end - 1) {
   1115 			/* to avoid fall-through from one subprog into another
   1116 			 * the last insn of the subprog should be either exit
   1117 			 * or unconditional jump back
   1118 			 */
   1119 			if (code != (BPF_JMP | BPF_EXIT) &&
   1120 			    code != (BPF_JMP | BPF_JA)) {
   1121 				verbose(env, "last insn is not an exit or jmp\n");
   1122 				return -EINVAL;
   1123 			}
   1124 			subprog_start = subprog_end;
   1125 			cur_subprog++;
   1126 			if (cur_subprog < env->subprog_cnt)
   1127 				subprog_end = subprog[cur_subprog + 1].start;
   1128 		}
   1129 	}
   1130 	return 0;
   1131 }
   1132 
   1133 /* Parentage chain of this register (or stack slot) should take care of all
   1134  * issues like callee-saved registers, stack slot allocation time, etc.
   1135  */
   1136 static int mark_reg_read(struct bpf_verifier_env *env,
   1137 			 const struct bpf_reg_state *state,
   1138 			 struct bpf_reg_state *parent)
   1139 {
   1140 	bool writes = parent == state->parent; /* Observe write marks */
   1141 
   1142 	while (parent) {
   1143 		/* if read wasn't screened by an earlier write ... */
   1144 		if (writes && state->live & REG_LIVE_WRITTEN)
   1145 			break;
   1146 		if (parent->live & REG_LIVE_DONE) {
   1147 			verbose(env, "verifier BUG type %s var_off %lld off %d\n",
   1148 				reg_type_str[parent->type],
   1149 				parent->var_off.value, parent->off);
   1150 			return -EFAULT;
   1151 		}
   1152 		/* ... then we depend on parent's value */
   1153 		parent->live |= REG_LIVE_READ;
   1154 		state = parent;
   1155 		parent = state->parent;
   1156 		writes = true;
   1157 	}
   1158 	return 0;
   1159 }
   1160 
   1161 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
   1162 			 enum reg_arg_type t)
   1163 {
   1164 	struct bpf_verifier_state *vstate = env->cur_state;
   1165 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
   1166 	struct bpf_reg_state *regs = state->regs;
   1167 
   1168 	if (regno >= MAX_BPF_REG) {
   1169 		verbose(env, "R%d is invalid\n", regno);
   1170 		return -EINVAL;
   1171 	}
   1172 
   1173 	if (t == SRC_OP) {
   1174 		/* check whether register used as source operand can be read */
   1175 		if (regs[regno].type == NOT_INIT) {
   1176 			verbose(env, "R%d !read_ok\n", regno);
   1177 			return -EACCES;
   1178 		}
   1179 		/* We don't need to worry about FP liveness because it's read-only */
   1180 		if (regno != BPF_REG_FP)
   1181 			return mark_reg_read(env, &regs[regno],
   1182 					     regs[regno].parent);
   1183 	} else {
   1184 		/* check whether register used as dest operand can be written to */
   1185 		if (regno == BPF_REG_FP) {
   1186 			verbose(env, "frame pointer is read only\n");
   1187 			return -EACCES;
   1188 		}
   1189 		regs[regno].live |= REG_LIVE_WRITTEN;
   1190 		if (t == DST_OP)
   1191 			mark_reg_unknown(env, regs, regno);
   1192 	}
   1193 	return 0;
   1194 }
   1195 
   1196 static bool is_spillable_regtype(enum bpf_reg_type type)
   1197 {
   1198 	switch (type) {
   1199 	case PTR_TO_MAP_VALUE:
   1200 	case PTR_TO_MAP_VALUE_OR_NULL:
   1201 	case PTR_TO_STACK:
   1202 	case PTR_TO_CTX:
   1203 	case PTR_TO_PACKET:
   1204 	case PTR_TO_PACKET_META:
   1205 	case PTR_TO_PACKET_END:
   1206 	case PTR_TO_FLOW_KEYS:
   1207 	case CONST_PTR_TO_MAP:
   1208 	case PTR_TO_SOCKET:
   1209 	case PTR_TO_SOCKET_OR_NULL:
   1210 	case PTR_TO_SOCK_COMMON:
   1211 	case PTR_TO_SOCK_COMMON_OR_NULL:
   1212 	case PTR_TO_TCP_SOCK:
   1213 	case PTR_TO_TCP_SOCK_OR_NULL:
   1214 		return true;
   1215 	default:
   1216 		return false;
   1217 	}
   1218 }
   1219 
   1220 /* Does this register contain a constant zero? */
   1221 static bool register_is_null(struct bpf_reg_state *reg)
   1222 {
   1223 	return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
   1224 }
   1225 
   1226 /* check_stack_read/write functions track spill/fill of registers,
   1227  * stack boundary and alignment are checked in check_mem_access()
   1228  */
   1229 static int check_stack_write(struct bpf_verifier_env *env,
   1230 			     struct bpf_func_state *state, /* func where register points to */
   1231 			     int off, int size, int value_regno, int insn_idx)
   1232 {
   1233 	struct bpf_func_state *cur; /* state of the current function */
   1234 	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
   1235 	enum bpf_reg_type type;
   1236 
   1237 	err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE),
   1238 				 state->acquired_refs, true);
   1239 	if (err)
   1240 		return err;
   1241 	/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
   1242 	 * so it's aligned access and [off, off + size) are within stack limits
   1243 	 */
   1244 	if (!env->allow_ptr_leaks &&
   1245 	    state->stack[spi].slot_type[0] == STACK_SPILL &&
   1246 	    size != BPF_REG_SIZE) {
   1247 		verbose(env, "attempt to corrupt spilled pointer on stack\n");
   1248 		return -EACCES;
   1249 	}
   1250 
   1251 	cur = env->cur_state->frame[env->cur_state->curframe];
   1252 	if (value_regno >= 0 &&
   1253 	    is_spillable_regtype((type = cur->regs[value_regno].type))) {
   1254 
   1255 		/* register containing pointer is being spilled into stack */
   1256 		if (size != BPF_REG_SIZE) {
   1257 			verbose(env, "invalid size of register spill\n");
   1258 			return -EACCES;
   1259 		}
   1260 
   1261 		if (state != cur && type == PTR_TO_STACK) {
   1262 			verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
   1263 			return -EINVAL;
   1264 		}
   1265 
   1266 		/* save register state */
   1267 		state->stack[spi].spilled_ptr = cur->regs[value_regno];
   1268 		state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
   1269 
   1270 		for (i = 0; i < BPF_REG_SIZE; i++) {
   1271 			if (state->stack[spi].slot_type[i] == STACK_MISC &&
   1272 			    !env->allow_ptr_leaks) {
   1273 				int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off;
   1274 				int soff = (-spi - 1) * BPF_REG_SIZE;
   1275 
   1276 				/* detected reuse of integer stack slot with a pointer
   1277 				 * which means either llvm is reusing stack slot or
   1278 				 * an attacker is trying to exploit CVE-2018-3639
   1279 				 * (speculative store bypass)
   1280 				 * Have to sanitize that slot with preemptive
   1281 				 * store of zero.
   1282 				 */
   1283 				if (*poff && *poff != soff) {
   1284 					/* disallow programs where single insn stores
   1285 					 * into two different stack slots, since verifier
   1286 					 * cannot sanitize them
   1287 					 */
   1288 					verbose(env,
   1289 						"insn %d cannot access two stack slots fp%d and fp%d",
   1290 						insn_idx, *poff, soff);
   1291 					return -EINVAL;
   1292 				}
   1293 				*poff = soff;
   1294 			}
   1295 			state->stack[spi].slot_type[i] = STACK_SPILL;
   1296 		}
   1297 	} else {
   1298 		u8 type = STACK_MISC;
   1299 
   1300 		/* regular write of data into stack destroys any spilled ptr */
   1301 		state->stack[spi].spilled_ptr.type = NOT_INIT;
   1302 		/* Mark slots as STACK_MISC if they belonged to spilled ptr. */
   1303 		if (state->stack[spi].slot_type[0] == STACK_SPILL)
   1304 			for (i = 0; i < BPF_REG_SIZE; i++)
   1305 				state->stack[spi].slot_type[i] = STACK_MISC;
   1306 
   1307 		/* only mark the slot as written if all 8 bytes were written
   1308 		 * otherwise read propagation may incorrectly stop too soon
   1309 		 * when stack slots are partially written.
   1310 		 * This heuristic means that read propagation will be
   1311 		 * conservative, since it will add reg_live_read marks
   1312 		 * to stack slots all the way to first state when programs
   1313 		 * writes+reads less than 8 bytes
   1314 		 */
   1315 		if (size == BPF_REG_SIZE)
   1316 			state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
   1317 
   1318 		/* when we zero initialize stack slots mark them as such */
   1319 		if (value_regno >= 0 &&
   1320 		    register_is_null(&cur->regs[value_regno]))
   1321 			type = STACK_ZERO;
   1322 
   1323 		/* Mark slots affected by this stack write. */
   1324 		for (i = 0; i < size; i++)
   1325 			state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
   1326 				type;
   1327 	}
   1328 	return 0;
   1329 }
   1330 
   1331 static int check_stack_read(struct bpf_verifier_env *env,
   1332 			    struct bpf_func_state *reg_state /* func where register points to */,
   1333 			    int off, int size, int value_regno)
   1334 {
   1335 	struct bpf_verifier_state *vstate = env->cur_state;
   1336 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
   1337 	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
   1338 	u8 *stype;
   1339 
   1340 	if (reg_state->allocated_stack <= slot) {
   1341 		verbose(env, "invalid read from stack off %d+0 size %d\n",
   1342 			off, size);
   1343 		return -EACCES;
   1344 	}
   1345 	stype = reg_state->stack[spi].slot_type;
   1346 
   1347 	if (stype[0] == STACK_SPILL) {
   1348 		if (size != BPF_REG_SIZE) {
   1349 			verbose(env, "invalid size of register spill\n");
   1350 			return -EACCES;
   1351 		}
   1352 		for (i = 1; i < BPF_REG_SIZE; i++) {
   1353 			if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) {
   1354 				verbose(env, "corrupted spill memory\n");
   1355 				return -EACCES;
   1356 			}
   1357 		}
   1358 
   1359 		if (value_regno >= 0) {
   1360 			/* restore register state from stack */
   1361 			state->regs[value_regno] = reg_state->stack[spi].spilled_ptr;
   1362 			/* mark reg as written since spilled pointer state likely
   1363 			 * has its liveness marks cleared by is_state_visited()
   1364 			 * which resets stack/reg liveness for state transitions
   1365 			 */
   1366 			state->regs[value_regno].live |= REG_LIVE_WRITTEN;
   1367 		}
   1368 		mark_reg_read(env, &reg_state->stack[spi].spilled_ptr,
   1369 			      reg_state->stack[spi].spilled_ptr.parent);
   1370 		return 0;
   1371 	} else {
   1372 		int zeros = 0;
   1373 
   1374 		for (i = 0; i < size; i++) {
   1375 			if (stype[(slot - i) % BPF_REG_SIZE] == STACK_MISC)
   1376 				continue;
   1377 			if (stype[(slot - i) % BPF_REG_SIZE] == STACK_ZERO) {
   1378 				zeros++;
   1379 				continue;
   1380 			}
   1381 			verbose(env, "invalid read from stack off %d+%d size %d\n",
   1382 				off, i, size);
   1383 			return -EACCES;
   1384 		}
   1385 		mark_reg_read(env, &reg_state->stack[spi].spilled_ptr,
   1386 			      reg_state->stack[spi].spilled_ptr.parent);
   1387 		if (value_regno >= 0) {
   1388 			if (zeros == size) {
   1389 				/* any size read into register is zero extended,
   1390 				 * so the whole register == const_zero
   1391 				 */
   1392 				__mark_reg_const_zero(&state->regs[value_regno]);
   1393 			} else {
   1394 				/* have read misc data from the stack */
   1395 				mark_reg_unknown(env, state->regs, value_regno);
   1396 			}
   1397 			state->regs[value_regno].live |= REG_LIVE_WRITTEN;
   1398 		}
   1399 		return 0;
   1400 	}
   1401 }
   1402 
   1403 static int check_stack_access(struct bpf_verifier_env *env,
   1404 			      const struct bpf_reg_state *reg,
   1405 			      int off, int size)
   1406 {
   1407 	/* Stack accesses must be at a fixed offset, so that we
   1408 	 * can determine what type of data were returned. See
   1409 	 * check_stack_read().
   1410 	 */
   1411 	if (!tnum_is_const(reg->var_off)) {
   1412 		char tn_buf[48];
   1413 
   1414 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
   1415 		verbose(env, "variable stack access var_off=%s off=%d size=%d",
   1416 			tn_buf, off, size);
   1417 		return -EACCES;
   1418 	}
   1419 
   1420 	if (off >= 0 || off < -MAX_BPF_STACK) {
   1421 		verbose(env, "invalid stack off=%d size=%d\n", off, size);
   1422 		return -EACCES;
   1423 	}
   1424 
   1425 	return 0;
   1426 }
   1427 
   1428 /* check read/write into map element returned by bpf_map_lookup_elem() */
   1429 static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
   1430 			      int size, bool zero_size_allowed)
   1431 {
   1432 	struct bpf_reg_state *regs = cur_regs(env);
   1433 	struct bpf_map *map = regs[regno].map_ptr;
   1434 
   1435 	if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
   1436 	    off + size > map->value_size) {
   1437 		verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
   1438 			map->value_size, off, size);
   1439 		return -EACCES;
   1440 	}
   1441 	return 0;
   1442 }
   1443 
   1444 /* check read/write into a map element with possible variable offset */
   1445 static int check_map_access(struct bpf_verifier_env *env, u32 regno,
   1446 			    int off, int size, bool zero_size_allowed)
   1447 {
   1448 	struct bpf_verifier_state *vstate = env->cur_state;
   1449 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
   1450 	struct bpf_reg_state *reg = &state->regs[regno];
   1451 	int err;
   1452 
   1453 	/* We may have adjusted the register to this map value, so we
   1454 	 * need to try adding each of min_value and max_value to off
   1455 	 * to make sure our theoretical access will be safe.
   1456 	 */
   1457 	if (env->log.level)
   1458 		print_verifier_state(env, state);
   1459 
   1460 	/* The minimum value is only important with signed
   1461 	 * comparisons where we can't assume the floor of a
   1462 	 * value is 0.  If we are using signed variables for our
   1463 	 * index'es we need to make sure that whatever we use
   1464 	 * will have a set floor within our range.
   1465 	 */
   1466 	if (reg->smin_value < 0 &&
   1467 	    (reg->smin_value == S64_MIN ||
   1468 	     (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
   1469 	      reg->smin_value + off < 0)) {
   1470 		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
   1471 			regno);
   1472 		return -EACCES;
   1473 	}
   1474 	err = __check_map_access(env, regno, reg->smin_value + off, size,
   1475 				 zero_size_allowed);
   1476 	if (err) {
   1477 		verbose(env, "R%d min value is outside of the array range\n",
   1478 			regno);
   1479 		return err;
   1480 	}
   1481 
   1482 	/* If we haven't set a max value then we need to bail since we can't be
   1483 	 * sure we won't do bad things.
   1484 	 * If reg->umax_value + off could overflow, treat that as unbounded too.
   1485 	 */
   1486 	if (reg->umax_value >= BPF_MAX_VAR_OFF) {
   1487 		verbose(env, "R%d unbounded memory access, make sure to bounds check any array access into a map\n",
   1488 			regno);
   1489 		return -EACCES;
   1490 	}
   1491 	err = __check_map_access(env, regno, reg->umax_value + off, size,
   1492 				 zero_size_allowed);
   1493 	if (err)
   1494 		verbose(env, "R%d max value is outside of the array range\n",
   1495 			regno);
   1496 
   1497 	if (map_value_has_spin_lock(reg->map_ptr)) {
   1498 		u32 lock = reg->map_ptr->spin_lock_off;
   1499 
   1500 		/* if any part of struct bpf_spin_lock can be touched by
   1501 		 * load/store reject this program.
   1502 		 * To check that [x1, x2) overlaps with [y1, y2)
   1503 		 * it is sufficient to check x1 < y2 && y1 < x2.
   1504 		 */
   1505 		if (reg->smin_value + off < lock + sizeof(struct bpf_spin_lock) &&
   1506 		     lock < reg->umax_value + off + size) {
   1507 			verbose(env, "bpf_spin_lock cannot be accessed directly by load/store\n");
   1508 			return -EACCES;
   1509 		}
   1510 	}
   1511 	return err;
   1512 }
   1513 
   1514 #define MAX_PACKET_OFF 0xffff
   1515 
   1516 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
   1517 				       const struct bpf_call_arg_meta *meta,
   1518 				       enum bpf_access_type t)
   1519 {
   1520 	switch (env->prog->type) {
   1521 	/* Program types only with direct read access go here! */
   1522 	case BPF_PROG_TYPE_LWT_IN:
   1523 	case BPF_PROG_TYPE_LWT_OUT:
   1524 	case BPF_PROG_TYPE_LWT_SEG6LOCAL:
   1525 	case BPF_PROG_TYPE_SK_REUSEPORT:
   1526 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
   1527 	case BPF_PROG_TYPE_CGROUP_SKB:
   1528 		if (t == BPF_WRITE)
   1529 			return false;
   1530 		/* fallthrough */
   1531 
   1532 	/* Program types with direct read + write access go here! */
   1533 	case BPF_PROG_TYPE_SCHED_CLS:
   1534 	case BPF_PROG_TYPE_SCHED_ACT:
   1535 	case BPF_PROG_TYPE_XDP:
   1536 	case BPF_PROG_TYPE_LWT_XMIT:
   1537 	case BPF_PROG_TYPE_SK_SKB:
   1538 	case BPF_PROG_TYPE_SK_MSG:
   1539 		if (meta)
   1540 			return meta->pkt_access;
   1541 
   1542 		env->seen_direct_write = true;
   1543 		return true;
   1544 	default:
   1545 		return false;
   1546 	}
   1547 }
   1548 
   1549 static int __check_packet_access(struct bpf_verifier_env *env, u32 regno,
   1550 				 int off, int size, bool zero_size_allowed)
   1551 {
   1552 	struct bpf_reg_state *regs = cur_regs(env);
   1553 	struct bpf_reg_state *reg = &regs[regno];
   1554 
   1555 	if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
   1556 	    (u64)off + size > reg->range) {
   1557 		verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
   1558 			off, size, regno, reg->id, reg->off, reg->range);
   1559 		return -EACCES;
   1560 	}
   1561 	return 0;
   1562 }
   1563 
   1564 static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
   1565 			       int size, bool zero_size_allowed)
   1566 {
   1567 	struct bpf_reg_state *regs = cur_regs(env);
   1568 	struct bpf_reg_state *reg = &regs[regno];
   1569 	int err;
   1570 
   1571 	/* We may have added a variable offset to the packet pointer; but any
   1572 	 * reg->range we have comes after that.  We are only checking the fixed
   1573 	 * offset.
   1574 	 */
   1575 
   1576 	/* We don't allow negative numbers, because we aren't tracking enough
   1577 	 * detail to prove they're safe.
   1578 	 */
   1579 	if (reg->smin_value < 0) {
   1580 		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
   1581 			regno);
   1582 		return -EACCES;
   1583 	}
   1584 	err = __check_packet_access(env, regno, off, size, zero_size_allowed);
   1585 	if (err) {
   1586 		verbose(env, "R%d offset is outside of the packet\n", regno);
   1587 		return err;
   1588 	}
   1589 
   1590 	/* __check_packet_access has made sure "off + size - 1" is within u16.
   1591 	 * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
   1592 	 * otherwise find_good_pkt_pointers would have refused to set range info
   1593 	 * that __check_packet_access would have rejected this pkt access.
   1594 	 * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
   1595 	 */
   1596 	env->prog->aux->max_pkt_offset =
   1597 		max_t(u32, env->prog->aux->max_pkt_offset,
   1598 		      off + reg->umax_value + size - 1);
   1599 
   1600 	return err;
   1601 }
   1602 
   1603 /* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
   1604 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
   1605 			    enum bpf_access_type t, enum bpf_reg_type *reg_type)
   1606 {
   1607 	struct bpf_insn_access_aux info = {
   1608 		.reg_type = *reg_type,
   1609 	};
   1610 
   1611 	if (env->ops->is_valid_access &&
   1612 	    env->ops->is_valid_access(off, size, t, env->prog, &info)) {
   1613 		/* A non zero info.ctx_field_size indicates that this field is a
   1614 		 * candidate for later verifier transformation to load the whole
   1615 		 * field and then apply a mask when accessed with a narrower
   1616 		 * access than actual ctx access size. A zero info.ctx_field_size
   1617 		 * will only allow for whole field access and rejects any other
   1618 		 * type of narrower access.
   1619 		 */
   1620 		*reg_type = info.reg_type;
   1621 
   1622 		env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
   1623 		/* remember the offset of last byte accessed in ctx */
   1624 		if (env->prog->aux->max_ctx_offset < off + size)
   1625 			env->prog->aux->max_ctx_offset = off + size;
   1626 		return 0;
   1627 	}
   1628 
   1629 	verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
   1630 	return -EACCES;
   1631 }
   1632 
   1633 static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
   1634 				  int size)
   1635 {
   1636 	if (size < 0 || off < 0 ||
   1637 	    (u64)off + size > sizeof(struct bpf_flow_keys)) {
   1638 		verbose(env, "invalid access to flow keys off=%d size=%d\n",
   1639 			off, size);
   1640 		return -EACCES;
   1641 	}
   1642 	return 0;
   1643 }
   1644 
   1645 static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
   1646 			     u32 regno, int off, int size,
   1647 			     enum bpf_access_type t)
   1648 {
   1649 	struct bpf_reg_state *regs = cur_regs(env);
   1650 	struct bpf_reg_state *reg = &regs[regno];
   1651 	struct bpf_insn_access_aux info = {};
   1652 	bool valid;
   1653 
   1654 	if (reg->smin_value < 0) {
   1655 		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
   1656 			regno);
   1657 		return -EACCES;
   1658 	}
   1659 
   1660 	switch (reg->type) {
   1661 	case PTR_TO_SOCK_COMMON:
   1662 		valid = bpf_sock_common_is_valid_access(off, size, t, &info);
   1663 		break;
   1664 	case PTR_TO_SOCKET:
   1665 		valid = bpf_sock_is_valid_access(off, size, t, &info);
   1666 		break;
   1667 	case PTR_TO_TCP_SOCK:
   1668 		valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
   1669 		break;
   1670 	default:
   1671 		valid = false;
   1672 	}
   1673 
   1674 
   1675 	if (valid) {
   1676 		env->insn_aux_data[insn_idx].ctx_field_size =
   1677 			info.ctx_field_size;
   1678 		return 0;
   1679 	}
   1680 
   1681 	verbose(env, "R%d invalid %s access off=%d size=%d\n",
   1682 		regno, reg_type_str[reg->type], off, size);
   1683 
   1684 	return -EACCES;
   1685 }
   1686 
   1687 static bool __is_pointer_value(bool allow_ptr_leaks,
   1688 			       const struct bpf_reg_state *reg)
   1689 {
   1690 	if (allow_ptr_leaks)
   1691 		return false;
   1692 
   1693 	return reg->type != SCALAR_VALUE;
   1694 }
   1695 
   1696 static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
   1697 {
   1698 	return cur_regs(env) + regno;
   1699 }
   1700 
   1701 static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
   1702 {
   1703 	return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
   1704 }
   1705 
   1706 static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
   1707 {
   1708 	const struct bpf_reg_state *reg = reg_state(env, regno);
   1709 
   1710 	return reg->type == PTR_TO_CTX;
   1711 }
   1712 
   1713 static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
   1714 {
   1715 	const struct bpf_reg_state *reg = reg_state(env, regno);
   1716 
   1717 	return type_is_sk_pointer(reg->type);
   1718 }
   1719 
   1720 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
   1721 {
   1722 	const struct bpf_reg_state *reg = reg_state(env, regno);
   1723 
   1724 	return type_is_pkt_pointer(reg->type);
   1725 }
   1726 
   1727 static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
   1728 {
   1729 	const struct bpf_reg_state *reg = reg_state(env, regno);
   1730 
   1731 	/* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
   1732 	return reg->type == PTR_TO_FLOW_KEYS;
   1733 }
   1734 
   1735 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
   1736 				   const struct bpf_reg_state *reg,
   1737 				   int off, int size, bool strict)
   1738 {
   1739 	struct tnum reg_off;
   1740 	int ip_align;
   1741 
   1742 	/* Byte size accesses are always allowed. */
   1743 	if (!strict || size == 1)
   1744 		return 0;
   1745 
   1746 	/* For platforms that do not have a Kconfig enabling
   1747 	 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
   1748 	 * NET_IP_ALIGN is universally set to '2'.  And on platforms
   1749 	 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
   1750 	 * to this code only in strict mode where we want to emulate
   1751 	 * the NET_IP_ALIGN==2 checking.  Therefore use an
   1752 	 * unconditional IP align value of '2'.
   1753 	 */
   1754 	ip_align = 2;
   1755 
   1756 	reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
   1757 	if (!tnum_is_aligned(reg_off, size)) {
   1758 		char tn_buf[48];
   1759 
   1760 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
   1761 		verbose(env,
   1762 			"misaligned packet access off %d+%s+%d+%d size %d\n",
   1763 			ip_align, tn_buf, reg->off, off, size);
   1764 		return -EACCES;
   1765 	}
   1766 
   1767 	return 0;
   1768 }
   1769 
   1770 static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
   1771 				       const struct bpf_reg_state *reg,
   1772 				       const char *pointer_desc,
   1773 				       int off, int size, bool strict)
   1774 {
   1775 	struct tnum reg_off;
   1776 
   1777 	/* Byte size accesses are always allowed. */
   1778 	if (!strict || size == 1)
   1779 		return 0;
   1780 
   1781 	reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
   1782 	if (!tnum_is_aligned(reg_off, size)) {
   1783 		char tn_buf[48];
   1784 
   1785 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
   1786 		verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
   1787 			pointer_desc, tn_buf, reg->off, off, size);
   1788 		return -EACCES;
   1789 	}
   1790 
   1791 	return 0;
   1792 }
   1793 
   1794 static int check_ptr_alignment(struct bpf_verifier_env *env,
   1795 			       const struct bpf_reg_state *reg, int off,
   1796 			       int size, bool strict_alignment_once)
   1797 {
   1798 	bool strict = env->strict_alignment || strict_alignment_once;
   1799 	const char *pointer_desc = "";
   1800 
   1801 	switch (reg->type) {
   1802 	case PTR_TO_PACKET:
   1803 	case PTR_TO_PACKET_META:
   1804 		/* Special case, because of NET_IP_ALIGN. Given metadata sits
   1805 		 * right in front, treat it the very same way.
   1806 		 */
   1807 		return check_pkt_ptr_alignment(env, reg, off, size, strict);
   1808 	case PTR_TO_FLOW_KEYS:
   1809 		pointer_desc = "flow keys ";
   1810 		break;
   1811 	case PTR_TO_MAP_VALUE:
   1812 		pointer_desc = "value ";
   1813 		break;
   1814 	case PTR_TO_CTX:
   1815 		pointer_desc = "context ";
   1816 		break;
   1817 	case PTR_TO_STACK:
   1818 		pointer_desc = "stack ";
   1819 		/* The stack spill tracking logic in check_stack_write()
   1820 		 * and check_stack_read() relies on stack accesses being
   1821 		 * aligned.
   1822 		 */
   1823 		strict = true;
   1824 		break;
   1825 	case PTR_TO_SOCKET:
   1826 		pointer_desc = "sock ";
   1827 		break;
   1828 	case PTR_TO_SOCK_COMMON:
   1829 		pointer_desc = "sock_common ";
   1830 		break;
   1831 	case PTR_TO_TCP_SOCK:
   1832 		pointer_desc = "tcp_sock ";
   1833 		break;
   1834 	default:
   1835 		break;
   1836 	}
   1837 	return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
   1838 					   strict);
   1839 }
   1840 
   1841 static int update_stack_depth(struct bpf_verifier_env *env,
   1842 			      const struct bpf_func_state *func,
   1843 			      int off)
   1844 {
   1845 	u16 stack = env->subprog_info[func->subprogno].stack_depth;
   1846 
   1847 	if (stack >= -off)
   1848 		return 0;
   1849 
   1850 	/* update known max for given subprogram */
   1851 	env->subprog_info[func->subprogno].stack_depth = -off;
   1852 	return 0;
   1853 }
   1854 
   1855 /* starting from main bpf function walk all instructions of the function
   1856  * and recursively walk all callees that given function can call.
   1857  * Ignore jump and exit insns.
   1858  * Since recursion is prevented by check_cfg() this algorithm
   1859  * only needs a local stack of MAX_CALL_FRAMES to remember callsites
   1860  */
   1861 static int check_max_stack_depth(struct bpf_verifier_env *env)
   1862 {
   1863 	int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
   1864 	struct bpf_subprog_info *subprog = env->subprog_info;
   1865 	struct bpf_insn *insn = env->prog->insnsi;
   1866 	int ret_insn[MAX_CALL_FRAMES];
   1867 	int ret_prog[MAX_CALL_FRAMES];
   1868 
   1869 process_func:
   1870 	/* round up to 32-bytes, since this is granularity
   1871 	 * of interpreter stack size
   1872 	 */
   1873 	depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
   1874 	if (depth > MAX_BPF_STACK) {
   1875 		verbose(env, "combined stack size of %d calls is %d. Too large\n",
   1876 			frame + 1, depth);
   1877 		return -EACCES;
   1878 	}
   1879 continue_func:
   1880 	subprog_end = subprog[idx + 1].start;
   1881 	for (; i < subprog_end; i++) {
   1882 		if (insn[i].code != (BPF_JMP | BPF_CALL))
   1883 			continue;
   1884 		if (insn[i].src_reg != BPF_PSEUDO_CALL)
   1885 			continue;
   1886 		/* remember insn and function to return to */
   1887 		ret_insn[frame] = i + 1;
   1888 		ret_prog[frame] = idx;
   1889 
   1890 		/* find the callee */
   1891 		i = i + insn[i].imm + 1;
   1892 		idx = find_subprog(env, i);
   1893 		if (idx < 0) {
   1894 			WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
   1895 				  i);
   1896 			return -EFAULT;
   1897 		}
   1898 		frame++;
   1899 		if (frame >= MAX_CALL_FRAMES) {
   1900 			verbose(env, "the call stack of %d frames is too deep !\n",
   1901 				frame);
   1902 			return -E2BIG;
   1903 		}
   1904 		goto process_func;
   1905 	}
   1906 	/* end of for() loop means the last insn of the 'subprog'
   1907 	 * was reached. Doesn't matter whether it was JA or EXIT
   1908 	 */
   1909 	if (frame == 0)
   1910 		return 0;
   1911 	depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
   1912 	frame--;
   1913 	i = ret_insn[frame];
   1914 	idx = ret_prog[frame];
   1915 	goto continue_func;
   1916 }
   1917 
   1918 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
   1919 static int get_callee_stack_depth(struct bpf_verifier_env *env,
   1920 				  const struct bpf_insn *insn, int idx)
   1921 {
   1922 	int start = idx + insn->imm + 1, subprog;
   1923 
   1924 	subprog = find_subprog(env, start);
   1925 	if (subprog < 0) {
   1926 		WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
   1927 			  start);
   1928 		return -EFAULT;
   1929 	}
   1930 	return env->subprog_info[subprog].stack_depth;
   1931 }
   1932 #endif
   1933 
   1934 static int check_ctx_reg(struct bpf_verifier_env *env,
   1935 			 const struct bpf_reg_state *reg, int regno)
   1936 {
   1937 	/* Access to ctx or passing it to a helper is only allowed in
   1938 	 * its original, unmodified form.
   1939 	 */
   1940 
   1941 	if (reg->off) {
   1942 		verbose(env, "dereference of modified ctx ptr R%d off=%d disallowed\n",
   1943 			regno, reg->off);
   1944 		return -EACCES;
   1945 	}
   1946 
   1947 	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
   1948 		char tn_buf[48];
   1949 
   1950 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
   1951 		verbose(env, "variable ctx access var_off=%s disallowed\n", tn_buf);
   1952 		return -EACCES;
   1953 	}
   1954 
   1955 	return 0;
   1956 }
   1957 
   1958 /* truncate register to smaller size (in bytes)
   1959  * must be called with size < BPF_REG_SIZE
   1960  */
   1961 static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
   1962 {
   1963 	u64 mask;
   1964 
   1965 	/* clear high bits in bit representation */
   1966 	reg->var_off = tnum_cast(reg->var_off, size);
   1967 
   1968 	/* fix arithmetic bounds */
   1969 	mask = ((u64)1 << (size * 8)) - 1;
   1970 	if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
   1971 		reg->umin_value &= mask;
   1972 		reg->umax_value &= mask;
   1973 	} else {
   1974 		reg->umin_value = 0;
   1975 		reg->umax_value = mask;
   1976 	}
   1977 	reg->smin_value = reg->umin_value;
   1978 	reg->smax_value = reg->umax_value;
   1979 }
   1980 
   1981 /* check whether memory at (regno + off) is accessible for t = (read | write)
   1982  * if t==write, value_regno is a register which value is stored into memory
   1983  * if t==read, value_regno is a register which will receive the value from memory
   1984  * if t==write && value_regno==-1, some unknown value is stored into memory
   1985  * if t==read && value_regno==-1, don't care what we read from memory
   1986  */
   1987 static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
   1988 			    int off, int bpf_size, enum bpf_access_type t,
   1989 			    int value_regno, bool strict_alignment_once)
   1990 {
   1991 	struct bpf_reg_state *regs = cur_regs(env);
   1992 	struct bpf_reg_state *reg = regs + regno;
   1993 	struct bpf_func_state *state;
   1994 	int size, err = 0;
   1995 
   1996 	size = bpf_size_to_bytes(bpf_size);
   1997 	if (size < 0)
   1998 		return size;
   1999 
   2000 	/* alignment checks will add in reg->off themselves */
   2001 	err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
   2002 	if (err)
   2003 		return err;
   2004 
   2005 	/* for access checks, reg->off is just part of off */
   2006 	off += reg->off;
   2007 
   2008 	if (reg->type == PTR_TO_MAP_VALUE) {
   2009 		if (t == BPF_WRITE && value_regno >= 0 &&
   2010 		    is_pointer_value(env, value_regno)) {
   2011 			verbose(env, "R%d leaks addr into map\n", value_regno);
   2012 			return -EACCES;
   2013 		}
   2014 
   2015 		err = check_map_access(env, regno, off, size, false);
   2016 		if (!err && t == BPF_READ && value_regno >= 0)
   2017 			mark_reg_unknown(env, regs, value_regno);
   2018 
   2019 	} else if (reg->type == PTR_TO_CTX) {
   2020 		enum bpf_reg_type reg_type = SCALAR_VALUE;
   2021 
   2022 		if (t == BPF_WRITE && value_regno >= 0 &&
   2023 		    is_pointer_value(env, value_regno)) {
   2024 			verbose(env, "R%d leaks addr into ctx\n", value_regno);
   2025 			return -EACCES;
   2026 		}
   2027 
   2028 		err = check_ctx_reg(env, reg, regno);
   2029 		if (err < 0)
   2030 			return err;
   2031 
   2032 		err = check_ctx_access(env, insn_idx, off, size, t, &reg_type);
   2033 		if (!err && t == BPF_READ && value_regno >= 0) {
   2034 			/* ctx access returns either a scalar, or a
   2035 			 * PTR_TO_PACKET[_META,_END]. In the latter
   2036 			 * case, we know the offset is zero.
   2037 			 */
   2038 			if (reg_type == SCALAR_VALUE) {
   2039 				mark_reg_unknown(env, regs, value_regno);
   2040 			} else {
   2041 				mark_reg_known_zero(env, regs,
   2042 						    value_regno);
   2043 				if (reg_type_may_be_null(reg_type))
   2044 					regs[value_regno].id = ++env->id_gen;
   2045 			}
   2046 			regs[value_regno].type = reg_type;
   2047 		}
   2048 
   2049 	} else if (reg->type == PTR_TO_STACK) {
   2050 		off += reg->var_off.value;
   2051 		err = check_stack_access(env, reg, off, size);
   2052 		if (err)
   2053 			return err;
   2054 
   2055 		state = func(env, reg);
   2056 		err = update_stack_depth(env, state, off);
   2057 		if (err)
   2058 			return err;
   2059 
   2060 		if (t == BPF_WRITE)
   2061 			err = check_stack_write(env, state, off, size,
   2062 						value_regno, insn_idx);
   2063 		else
   2064 			err = check_stack_read(env, state, off, size,
   2065 					       value_regno);
   2066 	} else if (reg_is_pkt_pointer(reg)) {
   2067 		if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
   2068 			verbose(env, "cannot write into packet\n");
   2069 			return -EACCES;
   2070 		}
   2071 		if (t == BPF_WRITE && value_regno >= 0 &&
   2072 		    is_pointer_value(env, value_regno)) {
   2073 			verbose(env, "R%d leaks addr into packet\n",
   2074 				value_regno);
   2075 			return -EACCES;
   2076 		}
   2077 		err = check_packet_access(env, regno, off, size, false);
   2078 		if (!err && t == BPF_READ && value_regno >= 0)
   2079 			mark_reg_unknown(env, regs, value_regno);
   2080 	} else if (reg->type == PTR_TO_FLOW_KEYS) {
   2081 		if (t == BPF_WRITE && value_regno >= 0 &&
   2082 		    is_pointer_value(env, value_regno)) {
   2083 			verbose(env, "R%d leaks addr into flow keys\n",
   2084 				value_regno);
   2085 			return -EACCES;
   2086 		}
   2087 
   2088 		err = check_flow_keys_access(env, off, size);
   2089 		if (!err && t == BPF_READ && value_regno >= 0)
   2090 			mark_reg_unknown(env, regs, value_regno);
   2091 	} else if (type_is_sk_pointer(reg->type)) {
   2092 		if (t == BPF_WRITE) {
   2093 			verbose(env, "R%d cannot write into %s\n",
   2094 				regno, reg_type_str[reg->type]);
   2095 			return -EACCES;
   2096 		}
   2097 		err = check_sock_access(env, insn_idx, regno, off, size, t);
   2098 		if (!err && value_regno >= 0)
   2099 			mark_reg_unknown(env, regs, value_regno);
   2100 	} else {
   2101 		verbose(env, "R%d invalid mem access '%s'\n", regno,
   2102 			reg_type_str[reg->type]);
   2103 		return -EACCES;
   2104 	}
   2105 
   2106 	if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
   2107 	    regs[value_regno].type == SCALAR_VALUE) {
   2108 		/* b/h/w load zero-extends, mark upper bits as known 0 */
   2109 		coerce_reg_to_size(&regs[value_regno], size);
   2110 	}
   2111 	return err;
   2112 }
   2113 
   2114 static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
   2115 {
   2116 	int err;
   2117 
   2118 	if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
   2119 	    insn->imm != 0) {
   2120 		verbose(env, "BPF_XADD uses reserved fields\n");
   2121 		return -EINVAL;
   2122 	}
   2123 
   2124 	/* check src1 operand */
   2125 	err = check_reg_arg(env, insn->src_reg, SRC_OP);
   2126 	if (err)
   2127 		return err;
   2128 
   2129 	/* check src2 operand */
   2130 	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
   2131 	if (err)
   2132 		return err;
   2133 
   2134 	if (is_pointer_value(env, insn->src_reg)) {
   2135 		verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
   2136 		return -EACCES;
   2137 	}
   2138 
   2139 	if (is_ctx_reg(env, insn->dst_reg) ||
   2140 	    is_pkt_reg(env, insn->dst_reg) ||
   2141 	    is_flow_key_reg(env, insn->dst_reg) ||
   2142 	    is_sk_reg(env, insn->dst_reg)) {
   2143 		verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
   2144 			insn->dst_reg,
   2145 			reg_type_str[reg_state(env, insn->dst_reg)->type]);
   2146 		return -EACCES;
   2147 	}
   2148 
   2149 	/* check whether atomic_add can read the memory */
   2150 	err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
   2151 			       BPF_SIZE(insn->code), BPF_READ, -1, true);
   2152 	if (err)
   2153 		return err;
   2154 
   2155 	/* check whether atomic_add can write into the same memory */
   2156 	return check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
   2157 				BPF_SIZE(insn->code), BPF_WRITE, -1, true);
   2158 }
   2159 
   2160 /* when register 'regno' is passed into function that will read 'access_size'
   2161  * bytes from that pointer, make sure that it's within stack boundary
   2162  * and all elements of stack are initialized.
   2163  * Unlike most pointer bounds-checking functions, this one doesn't take an
   2164  * 'off' argument, so it has to add in reg->off itself.
   2165  */
   2166 static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
   2167 				int access_size, bool zero_size_allowed,
   2168 				struct bpf_call_arg_meta *meta)
   2169 {
   2170 	struct bpf_reg_state *reg = reg_state(env, regno);
   2171 	struct bpf_func_state *state = func(env, reg);
   2172 	int off, i, slot, spi;
   2173 
   2174 	if (reg->type != PTR_TO_STACK) {
   2175 		/* Allow zero-byte read from NULL, regardless of pointer type */
   2176 		if (zero_size_allowed && access_size == 0 &&
   2177 		    register_is_null(reg))
   2178 			return 0;
   2179 
   2180 		verbose(env, "R%d type=%s expected=%s\n", regno,
   2181 			reg_type_str[reg->type],
   2182 			reg_type_str[PTR_TO_STACK]);
   2183 		return -EACCES;
   2184 	}
   2185 
   2186 	/* Only allow fixed-offset stack reads */
   2187 	if (!tnum_is_const(reg->var_off)) {
   2188 		char tn_buf[48];
   2189 
   2190 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
   2191 		verbose(env, "invalid variable stack read R%d var_off=%s\n",
   2192 			regno, tn_buf);
   2193 		return -EACCES;
   2194 	}
   2195 	off = reg->off + reg->var_off.value;
   2196 	if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
   2197 	    access_size < 0 || (access_size == 0 && !zero_size_allowed)) {
   2198 		verbose(env, "invalid stack type R%d off=%d access_size=%d\n",
   2199 			regno, off, access_size);
   2200 		return -EACCES;
   2201 	}
   2202 
   2203 	if (meta && meta->raw_mode) {
   2204 		meta->access_size = access_size;
   2205 		meta->regno = regno;
   2206 		return 0;
   2207 	}
   2208 
   2209 	for (i = 0; i < access_size; i++) {
   2210 		u8 *stype;
   2211 
   2212 		slot = -(off + i) - 1;
   2213 		spi = slot / BPF_REG_SIZE;
   2214 		if (state->allocated_stack <= slot)
   2215 			goto err;
   2216 		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
   2217 		if (*stype == STACK_MISC)
   2218 			goto mark;
   2219 		if (*stype == STACK_ZERO) {
   2220 			/* helper can write anything into the stack */
   2221 			*stype = STACK_MISC;
   2222 			goto mark;
   2223 		}
   2224 err:
   2225 		verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
   2226 			off, i, access_size);
   2227 		return -EACCES;
   2228 mark:
   2229 		/* reading any byte out of 8-byte 'spill_slot' will cause
   2230 		 * the whole slot to be marked as 'read'
   2231 		 */
   2232 		mark_reg_read(env, &state->stack[spi].spilled_ptr,
   2233 			      state->stack[spi].spilled_ptr.parent);
   2234 	}
   2235 	return update_stack_depth(env, state, off);
   2236 }
   2237 
   2238 static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
   2239 				   int access_size, bool zero_size_allowed,
   2240 				   struct bpf_call_arg_meta *meta)
   2241 {
   2242 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
   2243 
   2244 	switch (reg->type) {
   2245 	case PTR_TO_PACKET:
   2246 	case PTR_TO_PACKET_META:
   2247 		return check_packet_access(env, regno, reg->off, access_size,
   2248 					   zero_size_allowed);
   2249 	case PTR_TO_MAP_VALUE:
   2250 		return check_map_access(env, regno, reg->off, access_size,
   2251 					zero_size_allowed);
   2252 	default: /* scalar_value|ptr_to_stack or invalid ptr */
   2253 		return check_stack_boundary(env, regno, access_size,
   2254 					    zero_size_allowed, meta);
   2255 	}
   2256 }
   2257 
   2258 /* Implementation details:
   2259  * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL
   2260  * Two bpf_map_lookups (even with the same key) will have different reg->id.
   2261  * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after
   2262  * value_or_null->value transition, since the verifier only cares about
   2263  * the range of access to valid map value pointer and doesn't care about actual
   2264  * address of the map element.
   2265  * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
   2266  * reg->id > 0 after value_or_null->value transition. By doing so
   2267  * two bpf_map_lookups will be considered two different pointers that
   2268  * point to different bpf_spin_locks.
   2269  * The verifier allows taking only one bpf_spin_lock at a time to avoid
   2270  * dead-locks.
   2271  * Since only one bpf_spin_lock is allowed the checks are simpler than
   2272  * reg_is_refcounted() logic. The verifier needs to remember only
   2273  * one spin_lock instead of array of acquired_refs.
   2274  * cur_state->active_spin_lock remembers which map value element got locked
   2275  * and clears it after bpf_spin_unlock.
   2276  */
   2277 static int process_spin_lock(struct bpf_verifier_env *env, int regno,
   2278 			     bool is_lock)
   2279 {
   2280 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
   2281 	struct bpf_verifier_state *cur = env->cur_state;
   2282 	bool is_const = tnum_is_const(reg->var_off);
   2283 	struct bpf_map *map = reg->map_ptr;
   2284 	u64 val = reg->var_off.value;
   2285 
   2286 	if (reg->type != PTR_TO_MAP_VALUE) {
   2287 		verbose(env, "R%d is not a pointer to map_value\n", regno);
   2288 		return -EINVAL;
   2289 	}
   2290 	if (!is_const) {
   2291 		verbose(env,
   2292 			"R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
   2293 			regno);
   2294 		return -EINVAL;
   2295 	}
   2296 	if (!map->btf) {
   2297 		verbose(env,
   2298 			"map '%s' has to have BTF in order to use bpf_spin_lock\n",
   2299 			map->name);
   2300 		return -EINVAL;
   2301 	}
   2302 	if (!map_value_has_spin_lock(map)) {
   2303 		if (map->spin_lock_off == -E2BIG)
   2304 			verbose(env,
   2305 				"map '%s' has more than one 'struct bpf_spin_lock'\n",
   2306 				map->name);
   2307 		else if (map->spin_lock_off == -ENOENT)
   2308 			verbose(env,
   2309 				"map '%s' doesn't have 'struct bpf_spin_lock'\n",
   2310 				map->name);
   2311 		else
   2312 			verbose(env,
   2313 				"map '%s' is not a struct type or bpf_spin_lock is mangled\n",
   2314 				map->name);
   2315 		return -EINVAL;
   2316 	}
   2317 	if (map->spin_lock_off != val + reg->off) {
   2318 		verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock'\n",
   2319 			val + reg->off);
   2320 		return -EINVAL;
   2321 	}
   2322 	if (is_lock) {
   2323 		if (cur->active_spin_lock) {
   2324 			verbose(env,
   2325 				"Locking two bpf_spin_locks are not allowed\n");
   2326 			return -EINVAL;
   2327 		}
   2328 		cur->active_spin_lock = reg->id;
   2329 	} else {
   2330 		if (!cur->active_spin_lock) {
   2331 			verbose(env, "bpf_spin_unlock without taking a lock\n");
   2332 			return -EINVAL;
   2333 		}
   2334 		if (cur->active_spin_lock != reg->id) {
   2335 			verbose(env, "bpf_spin_unlock of different lock\n");
   2336 			return -EINVAL;
   2337 		}
   2338 		cur->active_spin_lock = 0;
   2339 	}
   2340 	return 0;
   2341 }
   2342 
   2343 static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
   2344 {
   2345 	return type == ARG_PTR_TO_MEM ||
   2346 	       type == ARG_PTR_TO_MEM_OR_NULL ||
   2347 	       type == ARG_PTR_TO_UNINIT_MEM;
   2348 }
   2349 
   2350 static bool arg_type_is_mem_size(enum bpf_arg_type type)
   2351 {
   2352 	return type == ARG_CONST_SIZE ||
   2353 	       type == ARG_CONST_SIZE_OR_ZERO;
   2354 }
   2355 
   2356 static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
   2357 			  enum bpf_arg_type arg_type,
   2358 			  struct bpf_call_arg_meta *meta)
   2359 {
   2360 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
   2361 	enum bpf_reg_type expected_type, type = reg->type;
   2362 	int err = 0;
   2363 
   2364 	if (arg_type == ARG_DONTCARE)
   2365 		return 0;
   2366 
   2367 	err = check_reg_arg(env, regno, SRC_OP);
   2368 	if (err)
   2369 		return err;
   2370 
   2371 	if (arg_type == ARG_ANYTHING) {
   2372 		if (is_pointer_value(env, regno)) {
   2373 			verbose(env, "R%d leaks addr into helper function\n",
   2374 				regno);
   2375 			return -EACCES;
   2376 		}
   2377 		return 0;
   2378 	}
   2379 
   2380 	if (type_is_pkt_pointer(type) &&
   2381 	    !may_access_direct_pkt_data(env, meta, BPF_READ)) {
   2382 		verbose(env, "helper access to the packet is not allowed\n");
   2383 		return -EACCES;
   2384 	}
   2385 
   2386 	if (arg_type == ARG_PTR_TO_MAP_KEY ||
   2387 	    arg_type == ARG_PTR_TO_MAP_VALUE ||
   2388 	    arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
   2389 		expected_type = PTR_TO_STACK;
   2390 		if (!type_is_pkt_pointer(type) && type != PTR_TO_MAP_VALUE &&
   2391 		    type != expected_type)
   2392 			goto err_type;
   2393 	} else if (arg_type == ARG_CONST_SIZE ||
   2394 		   arg_type == ARG_CONST_SIZE_OR_ZERO) {
   2395 		expected_type = SCALAR_VALUE;
   2396 		if (type != expected_type)
   2397 			goto err_type;
   2398 	} else if (arg_type == ARG_CONST_MAP_PTR) {
   2399 		expected_type = CONST_PTR_TO_MAP;
   2400 		if (type != expected_type)
   2401 			goto err_type;
   2402 	} else if (arg_type == ARG_PTR_TO_CTX) {
   2403 		expected_type = PTR_TO_CTX;
   2404 		if (type != expected_type)
   2405 			goto err_type;
   2406 		err = check_ctx_reg(env, reg, regno);
   2407 		if (err < 0)
   2408 			return err;
   2409 	} else if (arg_type == ARG_PTR_TO_SOCK_COMMON) {
   2410 		expected_type = PTR_TO_SOCK_COMMON;
   2411 		/* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */
   2412 		if (!type_is_sk_pointer(type))
   2413 			goto err_type;
   2414 		if (reg->ref_obj_id) {
   2415 			if (meta->ref_obj_id) {
   2416 				verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
   2417 					regno, reg->ref_obj_id,
   2418 					meta->ref_obj_id);
   2419 				return -EFAULT;
   2420 			}
   2421 			meta->ref_obj_id = reg->ref_obj_id;
   2422 		}
   2423 	} else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
   2424 		if (meta->func_id == BPF_FUNC_spin_lock) {
   2425 			if (process_spin_lock(env, regno, true))
   2426 				return -EACCES;
   2427 		} else if (meta->func_id == BPF_FUNC_spin_unlock) {
   2428 			if (process_spin_lock(env, regno, false))
   2429 				return -EACCES;
   2430 		} else {
   2431 			verbose(env, "verifier internal error\n");
   2432 			return -EFAULT;
   2433 		}
   2434 	} else if (arg_type_is_mem_ptr(arg_type)) {
   2435 		expected_type = PTR_TO_STACK;
   2436 		/* One exception here. In case function allows for NULL to be
   2437 		 * passed in as argument, it's a SCALAR_VALUE type. Final test
   2438 		 * happens during stack boundary checking.
   2439 		 */
   2440 		if (register_is_null(reg) &&
   2441 		    arg_type == ARG_PTR_TO_MEM_OR_NULL)
   2442 			/* final test in check_stack_boundary() */;
   2443 		else if (!type_is_pkt_pointer(type) &&
   2444 			 type != PTR_TO_MAP_VALUE &&
   2445 			 type != expected_type)
   2446 			goto err_type;
   2447 		meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM;
   2448 	} else {
   2449 		verbose(env, "unsupported arg_type %d\n", arg_type);
   2450 		return -EFAULT;
   2451 	}
   2452 
   2453 	if (arg_type == ARG_CONST_MAP_PTR) {
   2454 		/* bpf_map_xxx(map_ptr) call: remember that map_ptr */
   2455 		meta->map_ptr = reg->map_ptr;
   2456 	} else if (arg_type == ARG_PTR_TO_MAP_KEY) {
   2457 		/* bpf_map_xxx(..., map_ptr, ..., key) call:
   2458 		 * check that [key, key + map->key_size) are within
   2459 		 * stack limits and initialized
   2460 		 */
   2461 		if (!meta->map_ptr) {
   2462 			/* in function declaration map_ptr must come before
   2463 			 * map_key, so that it's verified and known before
   2464 			 * we have to check map_key here. Otherwise it means
   2465 			 * that kernel subsystem misconfigured verifier
   2466 			 */
   2467 			verbose(env, "invalid map_ptr to access map->key\n");
   2468 			return -EACCES;
   2469 		}
   2470 		err = check_helper_mem_access(env, regno,
   2471 					      meta->map_ptr->key_size, false,
   2472 					      NULL);
   2473 	} else if (arg_type == ARG_PTR_TO_MAP_VALUE ||
   2474 		   arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
   2475 		/* bpf_map_xxx(..., map_ptr, ..., value) call:
   2476 		 * check [value, value + map->value_size) validity
   2477 		 */
   2478 		if (!meta->map_ptr) {
   2479 			/* kernel subsystem misconfigured verifier */
   2480 			verbose(env, "invalid map_ptr to access map->value\n");
   2481 			return -EACCES;
   2482 		}
   2483 		meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE);
   2484 		err = check_helper_mem_access(env, regno,
   2485 					      meta->map_ptr->value_size, false,
   2486 					      meta);
   2487 	} else if (arg_type_is_mem_size(arg_type)) {
   2488 		bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
   2489 
   2490 		/* remember the mem_size which may be used later
   2491 		 * to refine return values.
   2492 		 */
   2493 		meta->msize_smax_value = reg->smax_value;
   2494 		meta->msize_umax_value = reg->umax_value;
   2495 
   2496 		/* The register is SCALAR_VALUE; the access check
   2497 		 * happens using its boundaries.
   2498 		 */
   2499 		if (!tnum_is_const(reg->var_off))
   2500 			/* For unprivileged variable accesses, disable raw
   2501 			 * mode so that the program is required to
   2502 			 * initialize all the memory that the helper could
   2503 			 * just partially fill up.
   2504 			 */
   2505 			meta = NULL;
   2506 
   2507 		if (reg->smin_value < 0) {
   2508 			verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
   2509 				regno);
   2510 			return -EACCES;
   2511 		}
   2512 
   2513 		if (reg->umin_value == 0) {
   2514 			err = check_helper_mem_access(env, regno - 1, 0,
   2515 						      zero_size_allowed,
   2516 						      meta);
   2517 			if (err)
   2518 				return err;
   2519 		}
   2520 
   2521 		if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
   2522 			verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
   2523 				regno);
   2524 			return -EACCES;
   2525 		}
   2526 		err = check_helper_mem_access(env, regno - 1,
   2527 					      reg->umax_value,
   2528 					      zero_size_allowed, meta);
   2529 	}
   2530 
   2531 	return err;
   2532 err_type:
   2533 	verbose(env, "R%d type=%s expected=%s\n", regno,
   2534 		reg_type_str[type], reg_type_str[expected_type]);
   2535 	return -EACCES;
   2536 }
   2537 
   2538 static int check_map_func_compatibility(struct bpf_verifier_env *env,
   2539 					struct bpf_map *map, int func_id)
   2540 {
   2541 	if (!map)
   2542 		return 0;
   2543 
   2544 	/* We need a two way check, first is from map perspective ... */
   2545 	switch (map->map_type) {
   2546 	case BPF_MAP_TYPE_PROG_ARRAY:
   2547 		if (func_id != BPF_FUNC_tail_call)
   2548 			goto error;
   2549 		break;
   2550 	case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
   2551 		if (func_id != BPF_FUNC_perf_event_read &&
   2552 		    func_id != BPF_FUNC_perf_event_output &&
   2553 		    func_id != BPF_FUNC_perf_event_read_value)
   2554 			goto error;
   2555 		break;
   2556 	case BPF_MAP_TYPE_STACK_TRACE:
   2557 		if (func_id != BPF_FUNC_get_stackid)
   2558 			goto error;
   2559 		break;
   2560 	case BPF_MAP_TYPE_CGROUP_ARRAY:
   2561 		if (func_id != BPF_FUNC_skb_under_cgroup &&
   2562 		    func_id != BPF_FUNC_current_task_under_cgroup)
   2563 			goto error;
   2564 		break;
   2565 	case BPF_MAP_TYPE_CGROUP_STORAGE:
   2566 	case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
   2567 		if (func_id != BPF_FUNC_get_local_storage)
   2568 			goto error;
   2569 		break;
   2570 	/* devmap returns a pointer to a live net_device ifindex that we cannot
   2571 	 * allow to be modified from bpf side. So do not allow lookup elements
   2572 	 * for now.
   2573 	 */
   2574 	case BPF_MAP_TYPE_DEVMAP:
   2575 		if (func_id != BPF_FUNC_redirect_map)
   2576 			goto error;
   2577 		break;
   2578 	/* Restrict bpf side of cpumap and xskmap, open when use-cases
   2579 	 * appear.
   2580 	 */
   2581 	case BPF_MAP_TYPE_CPUMAP:
   2582 	case BPF_MAP_TYPE_XSKMAP:
   2583 		if (func_id != BPF_FUNC_redirect_map)
   2584 			goto error;
   2585 		break;
   2586 	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
   2587 	case BPF_MAP_TYPE_HASH_OF_MAPS:
   2588 		if (func_id != BPF_FUNC_map_lookup_elem)
   2589 			goto error;
   2590 		break;
   2591 	case BPF_MAP_TYPE_SOCKMAP:
   2592 		if (func_id != BPF_FUNC_sk_redirect_map &&
   2593 		    func_id != BPF_FUNC_sock_map_update &&
   2594 		    func_id != BPF_FUNC_map_delete_elem &&
   2595 		    func_id != BPF_FUNC_msg_redirect_map)
   2596 			goto error;
   2597 		break;
   2598 	case BPF_MAP_TYPE_SOCKHASH:
   2599 		if (func_id != BPF_FUNC_sk_redirect_hash &&
   2600 		    func_id != BPF_FUNC_sock_hash_update &&
   2601 		    func_id != BPF_FUNC_map_delete_elem &&
   2602 		    func_id != BPF_FUNC_msg_redirect_hash)
   2603 			goto error;
   2604 		break;
   2605 	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
   2606 		if (func_id != BPF_FUNC_sk_select_reuseport)
   2607 			goto error;
   2608 		break;
   2609 	case BPF_MAP_TYPE_QUEUE:
   2610 	case BPF_MAP_TYPE_STACK:
   2611 		if (func_id != BPF_FUNC_map_peek_elem &&
   2612 		    func_id != BPF_FUNC_map_pop_elem &&
   2613 		    func_id != BPF_FUNC_map_push_elem)
   2614 			goto error;
   2615 		break;
   2616 	default:
   2617 		break;
   2618 	}
   2619 
   2620 	/* ... and second from the function itself. */
   2621 	switch (func_id) {
   2622 	case BPF_FUNC_tail_call:
   2623 		if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
   2624 			goto error;
   2625 		if (env->subprog_cnt > 1) {
   2626 			verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n");
   2627 			return -EINVAL;
   2628 		}
   2629 		break;
   2630 	case BPF_FUNC_perf_event_read:
   2631 	case BPF_FUNC_perf_event_output:
   2632 	case BPF_FUNC_perf_event_read_value:
   2633 		if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
   2634 			goto error;
   2635 		break;
   2636 	case BPF_FUNC_get_stackid:
   2637 		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
   2638 			goto error;
   2639 		break;
   2640 	case BPF_FUNC_current_task_under_cgroup:
   2641 	case BPF_FUNC_skb_under_cgroup:
   2642 		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
   2643 			goto error;
   2644 		break;
   2645 	case BPF_FUNC_redirect_map:
   2646 		if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
   2647 		    map->map_type != BPF_MAP_TYPE_CPUMAP &&
   2648 		    map->map_type != BPF_MAP_TYPE_XSKMAP)
   2649 			goto error;
   2650 		break;
   2651 	case BPF_FUNC_sk_redirect_map:
   2652 	case BPF_FUNC_msg_redirect_map:
   2653 	case BPF_FUNC_sock_map_update:
   2654 		if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
   2655 			goto error;
   2656 		break;
   2657 	case BPF_FUNC_sk_redirect_hash:
   2658 	case BPF_FUNC_msg_redirect_hash:
   2659 	case BPF_FUNC_sock_hash_update:
   2660 		if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
   2661 			goto error;
   2662 		break;
   2663 	case BPF_FUNC_get_local_storage:
   2664 		if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
   2665 		    map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
   2666 			goto error;
   2667 		break;
   2668 	case BPF_FUNC_sk_select_reuseport:
   2669 		if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY)
   2670 			goto error;
   2671 		break;
   2672 	case BPF_FUNC_map_peek_elem:
   2673 	case BPF_FUNC_map_pop_elem:
   2674 	case BPF_FUNC_map_push_elem:
   2675 		if (map->map_type != BPF_MAP_TYPE_QUEUE &&
   2676 		    map->map_type != BPF_MAP_TYPE_STACK)
   2677 			goto error;
   2678 		break;
   2679 	default:
   2680 		break;
   2681 	}
   2682 
   2683 	return 0;
   2684 error:
   2685 	verbose(env, "cannot pass map_type %d into func %s#%d\n",
   2686 		map->map_type, func_id_name(func_id), func_id);
   2687 	return -EINVAL;
   2688 }
   2689 
   2690 static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
   2691 {
   2692 	int count = 0;
   2693 
   2694 	if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
   2695 		count++;
   2696 	if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
   2697 		count++;
   2698 	if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
   2699 		count++;
   2700 	if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
   2701 		count++;
   2702 	if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
   2703 		count++;
   2704 
   2705 	/* We only support one arg being in raw mode at the moment,
   2706 	 * which is sufficient for the helper functions we have
   2707 	 * right now.
   2708 	 */
   2709 	return count <= 1;
   2710 }
   2711 
   2712 static bool check_args_pair_invalid(enum bpf_arg_type arg_curr,
   2713 				    enum bpf_arg_type arg_next)
   2714 {
   2715 	return (arg_type_is_mem_ptr(arg_curr) &&
   2716 	        !arg_type_is_mem_size(arg_next)) ||
   2717 	       (!arg_type_is_mem_ptr(arg_curr) &&
   2718 		arg_type_is_mem_size(arg_next));
   2719 }
   2720 
   2721 static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
   2722 {
   2723 	/* bpf_xxx(..., buf, len) call will access 'len'
   2724 	 * bytes from memory 'buf'. Both arg types need
   2725 	 * to be paired, so make sure there's no buggy
   2726 	 * helper function specification.
   2727 	 */
   2728 	if (arg_type_is_mem_size(fn->arg1_type) ||
   2729 	    arg_type_is_mem_ptr(fn->arg5_type)  ||
   2730 	    check_args_pair_invalid(fn->arg1_type, fn->arg2_type) ||
   2731 	    check_args_pair_invalid(fn->arg2_type, fn->arg3_type) ||
   2732 	    check_args_pair_invalid(fn->arg3_type, fn->arg4_type) ||
   2733 	    check_args_pair_invalid(fn->arg4_type, fn->arg5_type))
   2734 		return false;
   2735 
   2736 	return true;
   2737 }
   2738 
   2739 static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
   2740 {
   2741 	int count = 0;
   2742 
   2743 	if (arg_type_may_be_refcounted(fn->arg1_type))
   2744 		count++;
   2745 	if (arg_type_may_be_refcounted(fn->arg2_type))
   2746 		count++;
   2747 	if (arg_type_may_be_refcounted(fn->arg3_type))
   2748 		count++;
   2749 	if (arg_type_may_be_refcounted(fn->arg4_type))
   2750 		count++;
   2751 	if (arg_type_may_be_refcounted(fn->arg5_type))
   2752 		count++;
   2753 
   2754 	/* A reference acquiring function cannot acquire
   2755 	 * another refcounted ptr.
   2756 	 */
   2757 	if (is_acquire_function(func_id) && count)
   2758 		return false;
   2759 
   2760 	/* We only support one arg being unreferenced at the moment,
   2761 	 * which is sufficient for the helper functions we have right now.
   2762 	 */
   2763 	return count <= 1;
   2764 }
   2765 
   2766 static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
   2767 {
   2768 	return check_raw_mode_ok(fn) &&
   2769 	       check_arg_pair_ok(fn) &&
   2770 	       check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
   2771 }
   2772 
   2773 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
   2774  * are now invalid, so turn them into unknown SCALAR_VALUE.
   2775  */
   2776 static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
   2777 				     struct bpf_func_state *state)
   2778 {
   2779 	struct bpf_reg_state *regs = state->regs, *reg;
   2780 	int i;
   2781 
   2782 	for (i = 0; i < MAX_BPF_REG; i++)
   2783 		if (reg_is_pkt_pointer_any(&regs[i]))
   2784 			mark_reg_unknown(env, regs, i);
   2785 
   2786 	bpf_for_each_spilled_reg(i, state, reg) {
   2787 		if (!reg)
   2788 			continue;
   2789 		if (reg_is_pkt_pointer_any(reg))
   2790 			__mark_reg_unknown(reg);
   2791 	}
   2792 }
   2793 
   2794 static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
   2795 {
   2796 	struct bpf_verifier_state *vstate = env->cur_state;
   2797 	int i;
   2798 
   2799 	for (i = 0; i <= vstate->curframe; i++)
   2800 		__clear_all_pkt_pointers(env, vstate->frame[i]);
   2801 }
   2802 
   2803 static void release_reg_references(struct bpf_verifier_env *env,
   2804 				   struct bpf_func_state *state,
   2805 				   int ref_obj_id)
   2806 {
   2807 	struct bpf_reg_state *regs = state->regs, *reg;
   2808 	int i;
   2809 
   2810 	for (i = 0; i < MAX_BPF_REG; i++)
   2811 		if (regs[i].ref_obj_id == ref_obj_id)
   2812 			mark_reg_unknown(env, regs, i);
   2813 
   2814 	bpf_for_each_spilled_reg(i, state, reg) {
   2815 		if (!reg)
   2816 			continue;
   2817 		if (reg->ref_obj_id == ref_obj_id)
   2818 			__mark_reg_unknown(reg);
   2819 	}
   2820 }
   2821 
   2822 /* The pointer with the specified id has released its reference to kernel
   2823  * resources. Identify all copies of the same pointer and clear the reference.
   2824  */
   2825 static int release_reference(struct bpf_verifier_env *env,
   2826 			     int ref_obj_id)
   2827 {
   2828 	struct bpf_verifier_state *vstate = env->cur_state;
   2829 	int err;
   2830 	int i;
   2831 
   2832 	err = release_reference_state(cur_func(env), ref_obj_id);
   2833 	if (err)
   2834 		return err;
   2835 
   2836 	for (i = 0; i <= vstate->curframe; i++)
   2837 		release_reg_references(env, vstate->frame[i], ref_obj_id);
   2838 
   2839 	return 0;
   2840 }
   2841 
   2842 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
   2843 			   int *insn_idx)
   2844 {
   2845 	struct bpf_verifier_state *state = env->cur_state;
   2846 	struct bpf_func_state *caller, *callee;
   2847 	int i, err, subprog, target_insn;
   2848 
   2849 	if (state->curframe + 1 >= MAX_CALL_FRAMES) {
   2850 		verbose(env, "the call stack of %d frames is too deep\n",
   2851 			state->curframe + 2);
   2852 		return -E2BIG;
   2853 	}
   2854 
   2855 	target_insn = *insn_idx + insn->imm;
   2856 	subprog = find_subprog(env, target_insn + 1);
   2857 	if (subprog < 0) {
   2858 		verbose(env, "verifier bug. No program starts at insn %d\n",
   2859 			target_insn + 1);
   2860 		return -EFAULT;
   2861 	}
   2862 
   2863 	caller = state->frame[state->curframe];
   2864 	if (state->frame[state->curframe + 1]) {
   2865 		verbose(env, "verifier bug. Frame %d already allocated\n",
   2866 			state->curframe + 1);
   2867 		return -EFAULT;
   2868 	}
   2869 
   2870 	callee = kzalloc(sizeof(*callee), GFP_KERNEL);
   2871 	if (!callee)
   2872 		return -ENOMEM;
   2873 	state->frame[state->curframe + 1] = callee;
   2874 
   2875 	/* callee cannot access r0, r6 - r9 for reading and has to write
   2876 	 * into its own stack before reading from it.
   2877 	 * callee can read/write into caller's stack
   2878 	 */
   2879 	init_func_state(env, callee,
   2880 			/* remember the callsite, it will be used by bpf_exit */
   2881 			*insn_idx /* callsite */,
   2882 			state->curframe + 1 /* frameno within this callchain */,
   2883 			subprog /* subprog number within this prog */);
   2884 
   2885 	/* Transfer references to the callee */
   2886 	err = transfer_reference_state(callee, caller);
   2887 	if (err)
   2888 		return err;
   2889 
   2890 	/* copy r1 - r5 args that callee can access.  The copy includes parent
   2891 	 * pointers, which connects us up to the liveness chain
   2892 	 */
   2893 	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
   2894 		callee->regs[i] = caller->regs[i];
   2895 
   2896 	/* after the call registers r0 - r5 were scratched */
   2897 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
   2898 		mark_reg_not_init(env, caller->regs, caller_saved[i]);
   2899 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
   2900 	}
   2901 
   2902 	/* only increment it after check_reg_arg() finished */
   2903 	state->curframe++;
   2904 
   2905 	/* and go analyze first insn of the callee */
   2906 	*insn_idx = target_insn;
   2907 
   2908 	if (env->log.level) {
   2909 		verbose(env, "caller:\n");
   2910 		print_verifier_state(env, caller);
   2911 		verbose(env, "callee:\n");
   2912 		print_verifier_state(env, callee);
   2913 	}
   2914 	return 0;
   2915 }
   2916 
   2917 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
   2918 {
   2919 	struct bpf_verifier_state *state = env->cur_state;
   2920 	struct bpf_func_state *caller, *callee;
   2921 	struct bpf_reg_state *r0;
   2922 	int err;
   2923 
   2924 	callee = state->frame[state->curframe];
   2925 	r0 = &callee->regs[BPF_REG_0];
   2926 	if (r0->type == PTR_TO_STACK) {
   2927 		/* technically it's ok to return caller's stack pointer
   2928 		 * (or caller's caller's pointer) back to the caller,
   2929 		 * since these pointers are valid. Only current stack
   2930 		 * pointer will be invalid as soon as function exits,
   2931 		 * but let's be conservative
   2932 		 */
   2933 		verbose(env, "cannot return stack pointer to the caller\n");
   2934 		return -EINVAL;
   2935 	}
   2936 
   2937 	state->curframe--;
   2938 	caller = state->frame[state->curframe];
   2939 	/* return to the caller whatever r0 had in the callee */
   2940 	caller->regs[BPF_REG_0] = *r0;
   2941 
   2942 	/* Transfer references to the caller */
   2943 	err = transfer_reference_state(caller, callee);
   2944 	if (err)
   2945 		return err;
   2946 
   2947 	*insn_idx = callee->callsite + 1;
   2948 	if (env->log.level) {
   2949 		verbose(env, "returning from callee:\n");
   2950 		print_verifier_state(env, callee);
   2951 		verbose(env, "to caller at %d:\n", *insn_idx);
   2952 		print_verifier_state(env, caller);
   2953 	}
   2954 	/* clear everything in the callee */
   2955 	free_func_state(callee);
   2956 	state->frame[state->curframe + 1] = NULL;
   2957 	return 0;
   2958 }
   2959 
   2960 static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
   2961 				   int func_id,
   2962 				   struct bpf_call_arg_meta *meta)
   2963 {
   2964 	struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
   2965 
   2966 	if (ret_type != RET_INTEGER ||
   2967 	    (func_id != BPF_FUNC_get_stack &&
   2968 	     func_id != BPF_FUNC_probe_read_str))
   2969 		return;
   2970 
   2971 	ret_reg->smax_value = meta->msize_smax_value;
   2972 	ret_reg->umax_value = meta->msize_umax_value;
   2973 	__reg_deduce_bounds(ret_reg);
   2974 	__reg_bound_offset(ret_reg);
   2975 }
   2976 
   2977 static int
   2978 record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
   2979 		int func_id, int insn_idx)
   2980 {
   2981 	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
   2982 
   2983 	if (func_id != BPF_FUNC_tail_call &&
   2984 	    func_id != BPF_FUNC_map_lookup_elem &&
   2985 	    func_id != BPF_FUNC_map_update_elem &&
   2986 	    func_id != BPF_FUNC_map_delete_elem &&
   2987 	    func_id != BPF_FUNC_map_push_elem &&
   2988 	    func_id != BPF_FUNC_map_pop_elem &&
   2989 	    func_id != BPF_FUNC_map_peek_elem)
   2990 		return 0;
   2991 
   2992 	if (meta->map_ptr == NULL) {
   2993 		verbose(env, "kernel subsystem misconfigured verifier\n");
   2994 		return -EINVAL;
   2995 	}
   2996 
   2997 	if (!BPF_MAP_PTR(aux->map_state))
   2998 		bpf_map_ptr_store(aux, meta->map_ptr,
   2999 				  meta->map_ptr->unpriv_array);
   3000 	else if (BPF_MAP_PTR(aux->map_state) != meta->map_ptr)
   3001 		bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
   3002 				  meta->map_ptr->unpriv_array);
   3003 	return 0;
   3004 }
   3005 
   3006 static int check_reference_leak(struct bpf_verifier_env *env)
   3007 {
   3008 	struct bpf_func_state *state = cur_func(env);
   3009 	int i;
   3010 
   3011 	for (i = 0; i < state->acquired_refs; i++) {
   3012 		verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
   3013 			state->refs[i].id, state->refs[i].insn_idx);
   3014 	}
   3015 	return state->acquired_refs ? -EINVAL : 0;
   3016 }
   3017 
   3018 static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
   3019 {
   3020 	const struct bpf_func_proto *fn = NULL;
   3021 	struct bpf_reg_state *regs;
   3022 	struct bpf_call_arg_meta meta;
   3023 	bool changes_data;
   3024 	int i, err;
   3025 
   3026 	/* find function prototype */
   3027 	if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
   3028 		verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
   3029 			func_id);
   3030 		return -EINVAL;
   3031 	}
   3032 
   3033 	if (env->ops->get_func_proto)
   3034 		fn = env->ops->get_func_proto(func_id, env->prog);
   3035 	if (!fn) {
   3036 		verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
   3037 			func_id);
   3038 		return -EINVAL;
   3039 	}
   3040 
   3041 	/* eBPF programs must be GPL compatible to use GPL-ed functions */
   3042 	if (!env->prog->gpl_compatible && fn->gpl_only) {
   3043 		verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
   3044 		return -EINVAL;
   3045 	}
   3046 
   3047 	/* With LD_ABS/IND some JITs save/restore skb from r1. */
   3048 	changes_data = bpf_helper_changes_pkt_data(fn->func);
   3049 	if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
   3050 		verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
   3051 			func_id_name(func_id), func_id);
   3052 		return -EINVAL;
   3053 	}
   3054 
   3055 	memset(&meta, 0, sizeof(meta));
   3056 	meta.pkt_access = fn->pkt_access;
   3057 
   3058 	err = check_func_proto(fn, func_id);
   3059 	if (err) {
   3060 		verbose(env, "kernel subsystem misconfigured func %s#%d\n",
   3061 			func_id_name(func_id), func_id);
   3062 		return err;
   3063 	}
   3064 
   3065 	meta.func_id = func_id;
   3066 	/* check args */
   3067 	err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &meta);
   3068 	if (err)
   3069 		return err;
   3070 	err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
   3071 	if (err)
   3072 		return err;
   3073 	err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
   3074 	if (err)
   3075 		return err;
   3076 	err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &meta);
   3077 	if (err)
   3078 		return err;
   3079 	err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &meta);
   3080 	if (err)
   3081 		return err;
   3082 
   3083 	err = record_func_map(env, &meta, func_id, insn_idx);
   3084 	if (err)
   3085 		return err;
   3086 
   3087 	/* Mark slots with STACK_MISC in case of raw mode, stack offset
   3088 	 * is inferred from register state.
   3089 	 */
   3090 	for (i = 0; i < meta.access_size; i++) {
   3091 		err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
   3092 				       BPF_WRITE, -1, false);
   3093 		if (err)
   3094 			return err;
   3095 	}
   3096 
   3097 	if (func_id == BPF_FUNC_tail_call) {
   3098 		err = check_reference_leak(env);
   3099 		if (err) {
   3100 			verbose(env, "tail_call would lead to reference leak\n");
   3101 			return err;
   3102 		}
   3103 	} else if (is_release_function(func_id)) {
   3104 		err = release_reference(env, meta.ref_obj_id);
   3105 		if (err) {
   3106 			verbose(env, "func %s#%d reference has not been acquired before\n",
   3107 				func_id_name(func_id), func_id);
   3108 			return err;
   3109 		}
   3110 	}
   3111 
   3112 	regs = cur_regs(env);
   3113 
   3114 	/* check that flags argument in get_local_storage(map, flags) is 0,
   3115 	 * this is required because get_local_storage() can't return an error.
   3116 	 */
   3117 	if (func_id == BPF_FUNC_get_local_storage &&
   3118 	    !register_is_null(&regs[BPF_REG_2])) {
   3119 		verbose(env, "get_local_storage() doesn't support non-zero flags\n");
   3120 		return -EINVAL;
   3121 	}
   3122 
   3123 	/* reset caller saved regs */
   3124 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
   3125 		mark_reg_not_init(env, regs, caller_saved[i]);
   3126 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
   3127 	}
   3128 
   3129 	/* update return register (already marked as written above) */
   3130 	if (fn->ret_type == RET_INTEGER) {
   3131 		/* sets type to SCALAR_VALUE */
   3132 		mark_reg_unknown(env, regs, BPF_REG_0);
   3133 	} else if (fn->ret_type == RET_VOID) {
   3134 		regs[BPF_REG_0].type = NOT_INIT;
   3135 	} else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL ||
   3136 		   fn->ret_type == RET_PTR_TO_MAP_VALUE) {
   3137 		/* There is no offset yet applied, variable or fixed */
   3138 		mark_reg_known_zero(env, regs, BPF_REG_0);
   3139 		/* remember map_ptr, so that check_map_access()
   3140 		 * can check 'value_size' boundary of memory access
   3141 		 * to map element returned from bpf_map_lookup_elem()
   3142 		 */
   3143 		if (meta.map_ptr == NULL) {
   3144 			verbose(env,
   3145 				"kernel subsystem misconfigured verifier\n");
   3146 			return -EINVAL;
   3147 		}
   3148 		regs[BPF_REG_0].map_ptr = meta.map_ptr;
   3149 		if (fn->ret_type == RET_PTR_TO_MAP_VALUE) {
   3150 			regs[BPF_REG_0].type = PTR_TO_MAP_VALUE;
   3151 			if (map_value_has_spin_lock(meta.map_ptr))
   3152 				regs[BPF_REG_0].id = ++env->id_gen;
   3153 		} else {
   3154 			regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
   3155 			regs[BPF_REG_0].id = ++env->id_gen;
   3156 		}
   3157 	} else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
   3158 		mark_reg_known_zero(env, regs, BPF_REG_0);
   3159 		regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
   3160 		if (is_acquire_function(func_id)) {
   3161 			int id = acquire_reference_state(env, insn_idx);
   3162 
   3163 			if (id < 0)
   3164 				return id;
   3165 			/* For mark_ptr_or_null_reg() */
   3166 			regs[BPF_REG_0].id = id;
   3167 			/* For release_reference() */
   3168 			regs[BPF_REG_0].ref_obj_id = id;
   3169 		} else {
   3170 			/* For mark_ptr_or_null_reg() */
   3171 			regs[BPF_REG_0].id = ++env->id_gen;
   3172 		}
   3173 	} else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) {
   3174 		mark_reg_known_zero(env, regs, BPF_REG_0);
   3175 		regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL;
   3176 		regs[BPF_REG_0].id = ++env->id_gen;
   3177 	} else {
   3178 		verbose(env, "unknown return type %d of func %s#%d\n",
   3179 			fn->ret_type, func_id_name(func_id), func_id);
   3180 		return -EINVAL;
   3181 	}
   3182 
   3183 	if (is_ptr_cast_function(func_id))
   3184 		/* For release_reference() */
   3185 		regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
   3186 
   3187 	do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
   3188 
   3189 	err = check_map_func_compatibility(env, meta.map_ptr, func_id);
   3190 	if (err)
   3191 		return err;
   3192 
   3193 	if (func_id == BPF_FUNC_get_stack && !env->prog->has_callchain_buf) {
   3194 		const char *err_str;
   3195 
   3196 #ifdef CONFIG_PERF_EVENTS
   3197 		err = get_callchain_buffers(sysctl_perf_event_max_stack);
   3198 		err_str = "cannot get callchain buffer for func %s#%d\n";
   3199 #else
   3200 		err = -ENOTSUPP;
   3201 		err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
   3202 #endif
   3203 		if (err) {
   3204 			verbose(env, err_str, func_id_name(func_id), func_id);
   3205 			return err;
   3206 		}
   3207 
   3208 		env->prog->has_callchain_buf = true;
   3209 	}
   3210 
   3211 	if (changes_data)
   3212 		clear_all_pkt_pointers(env);
   3213 	return 0;
   3214 }
   3215 
   3216 static bool signed_add_overflows(s64 a, s64 b)
   3217 {
   3218 	/* Do the add in u64, where overflow is well-defined */
   3219 	s64 res = (s64)((u64)a + (u64)b);
   3220 
   3221 	if (b < 0)
   3222 		return res > a;
   3223 	return res < a;
   3224 }
   3225 
   3226 static bool signed_sub_overflows(s64 a, s64 b)
   3227 {
   3228 	/* Do the sub in u64, where overflow is well-defined */
   3229 	s64 res = (s64)((u64)a - (u64)b);
   3230 
   3231 	if (b < 0)
   3232 		return res < a;
   3233 	return res > a;
   3234 }
   3235 
   3236 static bool check_reg_sane_offset(struct bpf_verifier_env *env,
   3237 				  const struct bpf_reg_state *reg,
   3238 				  enum bpf_reg_type type)
   3239 {
   3240 	bool known = tnum_is_const(reg->var_off);
   3241 	s64 val = reg->var_off.value;
   3242 	s64 smin = reg->smin_value;
   3243 
   3244 	if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
   3245 		verbose(env, "math between %s pointer and %lld is not allowed\n",
   3246 			reg_type_str[type], val);
   3247 		return false;
   3248 	}
   3249 
   3250 	if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
   3251 		verbose(env, "%s pointer offset %d is not allowed\n",
   3252 			reg_type_str[type], reg->off);
   3253 		return false;
   3254 	}
   3255 
   3256 	if (smin == S64_MIN) {
   3257 		verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
   3258 			reg_type_str[type]);
   3259 		return false;
   3260 	}
   3261 
   3262 	if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
   3263 		verbose(env, "value %lld makes %s pointer be out of bounds\n",
   3264 			smin, reg_type_str[type]);
   3265 		return false;
   3266 	}
   3267 
   3268 	return true;
   3269 }
   3270 
   3271 static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
   3272 {
   3273 	return &env->insn_aux_data[env->insn_idx];
   3274 }
   3275 
   3276 static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
   3277 			      u32 *ptr_limit, u8 opcode, bool off_is_neg)
   3278 {
   3279 	bool mask_to_left = (opcode == BPF_ADD &&  off_is_neg) ||
   3280 			    (opcode == BPF_SUB && !off_is_neg);
   3281 	u32 off;
   3282 
   3283 	switch (ptr_reg->type) {
   3284 	case PTR_TO_STACK:
   3285 		off = ptr_reg->off + ptr_reg->var_off.value;
   3286 		if (mask_to_left)
   3287 			*ptr_limit = MAX_BPF_STACK + off;
   3288 		else
   3289 			*ptr_limit = -off;
   3290 		return 0;
   3291 	case PTR_TO_MAP_VALUE:
   3292 		if (mask_to_left) {
   3293 			*ptr_limit = ptr_reg->umax_value + ptr_reg->off;
   3294 		} else {
   3295 			off = ptr_reg->smin_value + ptr_reg->off;
   3296 			*ptr_limit = ptr_reg->map_ptr->value_size - off;
   3297 		}
   3298 		return 0;
   3299 	default:
   3300 		return -EINVAL;
   3301 	}
   3302 }
   3303 
   3304 static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
   3305 				    const struct bpf_insn *insn)
   3306 {
   3307 	return env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K;
   3308 }
   3309 
   3310 static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
   3311 				       u32 alu_state, u32 alu_limit)
   3312 {
   3313 	/* If we arrived here from different branches with different
   3314 	 * state or limits to sanitize, then this won't work.
   3315 	 */
   3316 	if (aux->alu_state &&
   3317 	    (aux->alu_state != alu_state ||
   3318 	     aux->alu_limit != alu_limit))
   3319 		return -EACCES;
   3320 
   3321 	/* Corresponding fixup done in fixup_bpf_calls(). */
   3322 	aux->alu_state = alu_state;
   3323 	aux->alu_limit = alu_limit;
   3324 	return 0;
   3325 }
   3326 
   3327 static int sanitize_val_alu(struct bpf_verifier_env *env,
   3328 			    struct bpf_insn *insn)
   3329 {
   3330 	struct bpf_insn_aux_data *aux = cur_aux(env);
   3331 
   3332 	if (can_skip_alu_sanitation(env, insn))
   3333 		return 0;
   3334 
   3335 	return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
   3336 }
   3337 
   3338 static int sanitize_ptr_alu(struct bpf_verifier_env *env,
   3339 			    struct bpf_insn *insn,
   3340 			    const struct bpf_reg_state *ptr_reg,
   3341 			    struct bpf_reg_state *dst_reg,
   3342 			    bool off_is_neg)
   3343 {
   3344 	struct bpf_verifier_state *vstate = env->cur_state;
   3345 	struct bpf_insn_aux_data *aux = cur_aux(env);
   3346 	bool ptr_is_dst_reg = ptr_reg == dst_reg;
   3347 	u8 opcode = BPF_OP(insn->code);
   3348 	u32 alu_state, alu_limit;
   3349 	struct bpf_reg_state tmp;
   3350 	bool ret;
   3351 
   3352 	if (can_skip_alu_sanitation(env, insn))
   3353 		return 0;
   3354 
   3355 	/* We already marked aux for masking from non-speculative
   3356 	 * paths, thus we got here in the first place. We only care
   3357 	 * to explore bad access from here.
   3358 	 */
   3359 	if (vstate->speculative)
   3360 		goto do_sim;
   3361 
   3362 	alu_state  = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
   3363 	alu_state |= ptr_is_dst_reg ?
   3364 		     BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
   3365 
   3366 	if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg))
   3367 		return 0;
   3368 	if (update_alu_sanitation_state(aux, alu_state, alu_limit))
   3369 		return -EACCES;
   3370 do_sim:
   3371 	/* Simulate and find potential out-of-bounds access under
   3372 	 * speculative execution from truncation as a result of
   3373 	 * masking when off was not within expected range. If off
   3374 	 * sits in dst, then we temporarily need to move ptr there
   3375 	 * to simulate dst (== 0) +/-= ptr. Needed, for example,
   3376 	 * for cases where we use K-based arithmetic in one direction
   3377 	 * and truncated reg-based in the other in order to explore
   3378 	 * bad access.
   3379 	 */
   3380 	if (!ptr_is_dst_reg) {
   3381 		tmp = *dst_reg;
   3382 		*dst_reg = *ptr_reg;
   3383 	}
   3384 	ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true);
   3385 	if (!ptr_is_dst_reg && ret)
   3386 		*dst_reg = tmp;
   3387 	return !ret ? -EFAULT : 0;
   3388 }
   3389 
   3390 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
   3391  * Caller should also handle BPF_MOV case separately.
   3392  * If we return -EACCES, caller may want to try again treating pointer as a
   3393  * scalar.  So we only emit a diagnostic if !env->allow_ptr_leaks.
   3394  */
   3395 static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
   3396 				   struct bpf_insn *insn,
   3397 				   const struct bpf_reg_state *ptr_reg,
   3398 				   const struct bpf_reg_state *off_reg)
   3399 {
   3400 	struct bpf_verifier_state *vstate = env->cur_state;
   3401 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
   3402 	struct bpf_reg_state *regs = state->regs, *dst_reg;
   3403 	bool known = tnum_is_const(off_reg->var_off);
   3404 	s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
   3405 	    smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
   3406 	u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
   3407 	    umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
   3408 	u32 dst = insn->dst_reg, src = insn->src_reg;
   3409 	u8 opcode = BPF_OP(insn->code);
   3410 	int ret;
   3411 
   3412 	dst_reg = &regs[dst];
   3413 
   3414 	if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
   3415 	    smin_val > smax_val || umin_val > umax_val) {
   3416 		/* Taint dst register if offset had invalid bounds derived from
   3417 		 * e.g. dead branches.
   3418 		 */
   3419 		__mark_reg_unknown(dst_reg);
   3420 		return 0;
   3421 	}
   3422 
   3423 	if (BPF_CLASS(insn->code) != BPF_ALU64) {
   3424 		/* 32-bit ALU ops on pointers produce (meaningless) scalars */
   3425 		verbose(env,
   3426 			"R%d 32-bit pointer arithmetic prohibited\n",
   3427 			dst);
   3428 		return -EACCES;
   3429 	}
   3430 
   3431 	switch (ptr_reg->type) {
   3432 	case PTR_TO_MAP_VALUE_OR_NULL:
   3433 		verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
   3434 			dst, reg_type_str[ptr_reg->type]);
   3435 		return -EACCES;
   3436 	case CONST_PTR_TO_MAP:
   3437 	case PTR_TO_PACKET_END:
   3438 	case PTR_TO_SOCKET:
   3439 	case PTR_TO_SOCKET_OR_NULL:
   3440 	case PTR_TO_SOCK_COMMON:
   3441 	case PTR_TO_SOCK_COMMON_OR_NULL:
   3442 	case PTR_TO_TCP_SOCK:
   3443 	case PTR_TO_TCP_SOCK_OR_NULL:
   3444 		verbose(env, "R%d pointer arithmetic on %s prohibited\n",
   3445 			dst, reg_type_str[ptr_reg->type]);
   3446 		return -EACCES;
   3447 	case PTR_TO_MAP_VALUE:
   3448 		if (!env->allow_ptr_leaks && !known && (smin_val < 0) != (smax_val < 0)) {
   3449 			verbose(env, "R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n",
   3450 				off_reg == dst_reg ? dst : src);
   3451 			return -EACCES;
   3452 		}
   3453 		/* fall-through */
   3454 	default:
   3455 		break;
   3456 	}
   3457 
   3458 	/* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
   3459 	 * The id may be overwritten later if we create a new variable offset.
   3460 	 */
   3461 	dst_reg->type = ptr_reg->type;
   3462 	dst_reg->id = ptr_reg->id;
   3463 
   3464 	if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
   3465 	    !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
   3466 		return -EINVAL;
   3467 
   3468 	switch (opcode) {
   3469 	case BPF_ADD:
   3470 		ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0);
   3471 		if (ret < 0) {
   3472 			verbose(env, "R%d tried to add from different maps or paths\n", dst);
   3473 			return ret;
   3474 		}
   3475 		/* We can take a fixed offset as long as it doesn't overflow
   3476 		 * the s32 'off' field
   3477 		 */
   3478 		if (known && (ptr_reg->off + smin_val ==
   3479 			      (s64)(s32)(ptr_reg->off + smin_val))) {
   3480 			/* pointer += K.  Accumulate it into fixed offset */
   3481 			dst_reg->smin_value = smin_ptr;
   3482 			dst_reg->smax_value = smax_ptr;
   3483 			dst_reg->umin_value = umin_ptr;
   3484 			dst_reg->umax_value = umax_ptr;
   3485 			dst_reg->var_off = ptr_reg->var_off;
   3486 			dst_reg->off = ptr_reg->off + smin_val;
   3487 			dst_reg->raw = ptr_reg->raw;
   3488 			break;
   3489 		}
   3490 		/* A new variable offset is created.  Note that off_reg->off
   3491 		 * == 0, since it's a scalar.
   3492 		 * dst_reg gets the pointer type and since some positive
   3493 		 * integer value was added to the pointer, give it a new 'id'
   3494 		 * if it's a PTR_TO_PACKET.
   3495 		 * this creates a new 'base' pointer, off_reg (variable) gets
   3496 		 * added into the variable offset, and we copy the fixed offset
   3497 		 * from ptr_reg.
   3498 		 */
   3499 		if (signed_add_overflows(smin_ptr, smin_val) ||
   3500 		    signed_add_overflows(smax_ptr, smax_val)) {
   3501 			dst_reg->smin_value = S64_MIN;
   3502 			dst_reg->smax_value = S64_MAX;
   3503 		} else {
   3504 			dst_reg->smin_value = smin_ptr + smin_val;
   3505 			dst_reg->smax_value = smax_ptr + smax_val;
   3506 		}
   3507 		if (umin_ptr + umin_val < umin_ptr ||
   3508 		    umax_ptr + umax_val < umax_ptr) {
   3509 			dst_reg->umin_value = 0;
   3510 			dst_reg->umax_value = U64_MAX;
   3511 		} else {
   3512 			dst_reg->umin_value = umin_ptr + umin_val;
   3513 			dst_reg->umax_value = umax_ptr + umax_val;
   3514 		}
   3515 		dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
   3516 		dst_reg->off = ptr_reg->off;
   3517 		dst_reg->raw = ptr_reg->raw;
   3518 		if (reg_is_pkt_pointer(ptr_reg)) {
   3519 			dst_reg->id = ++env->id_gen;
   3520 			/* something was added to pkt_ptr, set range to zero */
   3521 			dst_reg->raw = 0;
   3522 		}
   3523 		break;
   3524 	case BPF_SUB:
   3525 		ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0);
   3526 		if (ret < 0) {
   3527 			verbose(env, "R%d tried to sub from different maps or paths\n", dst);
   3528 			return ret;
   3529 		}
   3530 		if (dst_reg == off_reg) {
   3531 			/* scalar -= pointer.  Creates an unknown scalar */
   3532 			verbose(env, "R%d tried to subtract pointer from scalar\n",
   3533 				dst);
   3534 			return -EACCES;
   3535 		}
   3536 		/* We don't allow subtraction from FP, because (according to
   3537 		 * test_verifier.c test "invalid fp arithmetic", JITs might not
   3538 		 * be able to deal with it.
   3539 		 */
   3540 		if (ptr_reg->type == PTR_TO_STACK) {
   3541 			verbose(env, "R%d subtraction from stack pointer prohibited\n",
   3542 				dst);
   3543 			return -EACCES;
   3544 		}
   3545 		if (known && (ptr_reg->off - smin_val ==
   3546 			      (s64)(s32)(ptr_reg->off - smin_val))) {
   3547 			/* pointer -= K.  Subtract it from fixed offset */
   3548 			dst_reg->smin_value = smin_ptr;
   3549 			dst_reg->smax_value = smax_ptr;
   3550 			dst_reg->umin_value = umin_ptr;
   3551 			dst_reg->umax_value = umax_ptr;
   3552 			dst_reg->var_off = ptr_reg->var_off;
   3553 			dst_reg->id = ptr_reg->id;
   3554 			dst_reg->off = ptr_reg->off - smin_val;
   3555 			dst_reg->raw = ptr_reg->raw;
   3556 			break;
   3557 		}
   3558 		/* A new variable offset is created.  If the subtrahend is known
   3559 		 * nonnegative, then any reg->range we had before is still good.
   3560 		 */
   3561 		if (signed_sub_overflows(smin_ptr, smax_val) ||
   3562 		    signed_sub_overflows(smax_ptr, smin_val)) {
   3563 			/* Overflow possible, we know nothing */
   3564 			dst_reg->smin_value = S64_MIN;
   3565 			dst_reg->smax_value = S64_MAX;
   3566 		} else {
   3567 			dst_reg->smin_value = smin_ptr - smax_val;
   3568 			dst_reg->smax_value = smax_ptr - smin_val;
   3569 		}
   3570 		if (umin_ptr < umax_val) {
   3571 			/* Overflow possible, we know nothing */
   3572 			dst_reg->umin_value = 0;
   3573 			dst_reg->umax_value = U64_MAX;
   3574 		} else {
   3575 			/* Cannot overflow (as long as bounds are consistent) */
   3576 			dst_reg->umin_value = umin_ptr - umax_val;
   3577 			dst_reg->umax_value = umax_ptr - umin_val;
   3578 		}
   3579 		dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
   3580 		dst_reg->off = ptr_reg->off;
   3581 		dst_reg->raw = ptr_reg->raw;
   3582 		if (reg_is_pkt_pointer(ptr_reg)) {
   3583 			dst_reg->id = ++env->id_gen;
   3584 			/* something was added to pkt_ptr, set range to zero */
   3585 			if (smin_val < 0)
   3586 				dst_reg->raw = 0;
   3587 		}
   3588 		break;
   3589 	case BPF_AND:
   3590 	case BPF_OR:
   3591 	case BPF_XOR:
   3592 		/* bitwise ops on pointers are troublesome, prohibit. */
   3593 		verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
   3594 			dst, bpf_alu_string[opcode >> 4]);
   3595 		return -EACCES;
   3596 	default:
   3597 		/* other operators (e.g. MUL,LSH) produce non-pointer results */
   3598 		verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
   3599 			dst, bpf_alu_string[opcode >> 4]);
   3600 		return -EACCES;
   3601 	}
   3602 
   3603 	if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
   3604 		return -EINVAL;
   3605 
   3606 	__update_reg_bounds(dst_reg);
   3607 	__reg_deduce_bounds(dst_reg);
   3608 	__reg_bound_offset(dst_reg);
   3609 
   3610 	/* For unprivileged we require that resulting offset must be in bounds
   3611 	 * in order to be able to sanitize access later on.
   3612 	 */
   3613 	if (!env->allow_ptr_leaks) {
   3614 		if (dst_reg->type == PTR_TO_MAP_VALUE &&
   3615 		    check_map_access(env, dst, dst_reg->off, 1, false)) {
   3616 			verbose(env, "R%d pointer arithmetic of map value goes out of range, "
   3617 				"prohibited for !root\n", dst);
   3618 			return -EACCES;
   3619 		} else if (dst_reg->type == PTR_TO_STACK &&
   3620 			   check_stack_access(env, dst_reg, dst_reg->off +
   3621 					      dst_reg->var_off.value, 1)) {
   3622 			verbose(env, "R%d stack pointer arithmetic goes out of range, "
   3623 				"prohibited for !root\n", dst);
   3624 			return -EACCES;
   3625 		}
   3626 	}
   3627 
   3628 	return 0;
   3629 }
   3630 
   3631 /* WARNING: This function does calculations on 64-bit values, but the actual
   3632  * execution may occur on 32-bit values. Therefore, things like bitshifts
   3633  * need extra checks in the 32-bit case.
   3634  */
   3635 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
   3636 				      struct bpf_insn *insn,
   3637 				      struct bpf_reg_state *dst_reg,
   3638 				      struct bpf_reg_state src_reg)
   3639 {
   3640 	struct bpf_reg_state *regs = cur_regs(env);
   3641 	u8 opcode = BPF_OP(insn->code);
   3642 	bool src_known, dst_known;
   3643 	s64 smin_val, smax_val;
   3644 	u64 umin_val, umax_val;
   3645 	u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
   3646 	u32 dst = insn->dst_reg;
   3647 	int ret;
   3648 
   3649 	if (insn_bitness == 32) {
   3650 		/* Relevant for 32-bit RSH: Information can propagate towards
   3651 		 * LSB, so it isn't sufficient to only truncate the output to
   3652 		 * 32 bits.
   3653 		 */
   3654 		coerce_reg_to_size(dst_reg, 4);
   3655 		coerce_reg_to_size(&src_reg, 4);
   3656 	}
   3657 
   3658 	smin_val = src_reg.smin_value;
   3659 	smax_val = src_reg.smax_value;
   3660 	umin_val = src_reg.umin_value;
   3661 	umax_val = src_reg.umax_value;
   3662 	src_known = tnum_is_const(src_reg.var_off);
   3663 	dst_known = tnum_is_const(dst_reg->var_off);
   3664 
   3665 	if ((src_known && (smin_val != smax_val || umin_val != umax_val)) ||
   3666 	    smin_val > smax_val || umin_val > umax_val) {
   3667 		/* Taint dst register if offset had invalid bounds derived from
   3668 		 * e.g. dead branches.
   3669 		 */
   3670 		__mark_reg_unknown(dst_reg);
   3671 		return 0;
   3672 	}
   3673 
   3674 	if (!src_known &&
   3675 	    opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
   3676 		__mark_reg_unknown(dst_reg);
   3677 		return 0;
   3678 	}
   3679 
   3680 	switch (opcode) {
   3681 	case BPF_ADD:
   3682 		ret = sanitize_val_alu(env, insn);
   3683 		if (ret < 0) {
   3684 			verbose(env, "R%d tried to add from different pointers or scalars\n", dst);
   3685 			return ret;
   3686 		}
   3687 		if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
   3688 		    signed_add_overflows(dst_reg->smax_value, smax_val)) {
   3689 			dst_reg->smin_value = S64_MIN;
   3690 			dst_reg->smax_value = S64_MAX;
   3691 		} else {
   3692 			dst_reg->smin_value += smin_val;
   3693 			dst_reg->smax_value += smax_val;
   3694 		}
   3695 		if (dst_reg->umin_value + umin_val < umin_val ||
   3696 		    dst_reg->umax_value + umax_val < umax_val) {
   3697 			dst_reg->umin_value = 0;
   3698 			dst_reg->umax_value = U64_MAX;
   3699 		} else {
   3700 			dst_reg->umin_value += umin_val;
   3701 			dst_reg->umax_value += umax_val;
   3702 		}
   3703 		dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
   3704 		break;
   3705 	case BPF_SUB:
   3706 		ret = sanitize_val_alu(env, insn);
   3707 		if (ret < 0) {
   3708 			verbose(env, "R%d tried to sub from different pointers or scalars\n", dst);
   3709 			return ret;
   3710 		}
   3711 		if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
   3712 		    signed_sub_overflows(dst_reg->smax_value, smin_val)) {
   3713 			/* Overflow possible, we know nothing */
   3714 			dst_reg->smin_value = S64_MIN;
   3715 			dst_reg->smax_value = S64_MAX;
   3716 		} else {
   3717 			dst_reg->smin_value -= smax_val;
   3718 			dst_reg->smax_value -= smin_val;
   3719 		}
   3720 		if (dst_reg->umin_value < umax_val) {
   3721 			/* Overflow possible, we know nothing */
   3722 			dst_reg->umin_value = 0;
   3723 			dst_reg->umax_value = U64_MAX;
   3724 		} else {
   3725 			/* Cannot overflow (as long as bounds are consistent) */
   3726 			dst_reg->umin_value -= umax_val;
   3727 			dst_reg->umax_value -= umin_val;
   3728 		}
   3729 		dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
   3730 		break;
   3731 	case BPF_MUL:
   3732 		dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
   3733 		if (smin_val < 0 || dst_reg->smin_value < 0) {
   3734 			/* Ain't nobody got time to multiply that sign */
   3735 			__mark_reg_unbounded(dst_reg);
   3736 			__update_reg_bounds(dst_reg);
   3737 			break;
   3738 		}
   3739 		/* Both values are positive, so we can work with unsigned and
   3740 		 * copy the result to signed (unless it exceeds S64_MAX).
   3741 		 */
   3742 		if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
   3743 			/* Potential overflow, we know nothing */
   3744 			__mark_reg_unbounded(dst_reg);
   3745 			/* (except what we can learn from the var_off) */
   3746 			__update_reg_bounds(dst_reg);
   3747 			break;
   3748 		}
   3749 		dst_reg->umin_value *= umin_val;
   3750 		dst_reg->umax_value *= umax_val;
   3751 		if (dst_reg->umax_value > S64_MAX) {
   3752 			/* Overflow possible, we know nothing */
   3753 			dst_reg->smin_value = S64_MIN;
   3754 			dst_reg->smax_value = S64_MAX;
   3755 		} else {
   3756 			dst_reg->smin_value = dst_reg->umin_value;
   3757 			dst_reg->smax_value = dst_reg->umax_value;
   3758 		}
   3759 		break;
   3760 	case BPF_AND:
   3761 		if (src_known && dst_known) {
   3762 			__mark_reg_known(dst_reg, dst_reg->var_off.value &
   3763 						  src_reg.var_off.value);
   3764 			break;
   3765 		}
   3766 		/* We get our minimum from the var_off, since that's inherently
   3767 		 * bitwise.  Our maximum is the minimum of the operands' maxima.
   3768 		 */
   3769 		dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
   3770 		dst_reg->umin_value = dst_reg->var_off.value;
   3771 		dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
   3772 		if (dst_reg->smin_value < 0 || smin_val < 0) {
   3773 			/* Lose signed bounds when ANDing negative numbers,
   3774 			 * ain't nobody got time for that.
   3775 			 */
   3776 			dst_reg->smin_value = S64_MIN;
   3777 			dst_reg->smax_value = S64_MAX;
   3778 		} else {
   3779 			/* ANDing two positives gives a positive, so safe to
   3780 			 * cast result into s64.
   3781 			 */
   3782 			dst_reg->smin_value = dst_reg->umin_value;
   3783 			dst_reg->smax_value = dst_reg->umax_value;
   3784 		}
   3785 		/* We may learn something more from the var_off */
   3786 		__update_reg_bounds(dst_reg);
   3787 		break;
   3788 	case BPF_OR:
   3789 		if (src_known && dst_known) {
   3790 			__mark_reg_known(dst_reg, dst_reg->var_off.value |
   3791 						  src_reg.var_off.value);
   3792 			break;
   3793 		}
   3794 		/* We get our maximum from the var_off, and our minimum is the
   3795 		 * maximum of the operands' minima
   3796 		 */
   3797 		dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
   3798 		dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
   3799 		dst_reg->umax_value = dst_reg->var_off.value |
   3800 				      dst_reg->var_off.mask;
   3801 		if (dst_reg->smin_value < 0 || smin_val < 0) {
   3802 			/* Lose signed bounds when ORing negative numbers,
   3803 			 * ain't nobody got time for that.
   3804 			 */
   3805 			dst_reg->smin_value = S64_MIN;
   3806 			dst_reg->smax_value = S64_MAX;
   3807 		} else {
   3808 			/* ORing two positives gives a positive, so safe to
   3809 			 * cast result into s64.
   3810 			 */
   3811 			dst_reg->smin_value = dst_reg->umin_value;
   3812 			dst_reg->smax_value = dst_reg->umax_value;
   3813 		}
   3814 		/* We may learn something more from the var_off */
   3815 		__update_reg_bounds(dst_reg);
   3816 		break;
   3817 	case BPF_LSH:
   3818 		if (umax_val >= insn_bitness) {
   3819 			/* Shifts greater than 31 or 63 are undefined.
   3820 			 * This includes shifts by a negative number.
   3821 			 */
   3822 			mark_reg_unknown(env, regs, insn->dst_reg);
   3823 			break;
   3824 		}
   3825 		/* We lose all sign bit information (except what we can pick
   3826 		 * up from var_off)
   3827 		 */
   3828 		dst_reg->smin_value = S64_MIN;
   3829 		dst_reg->smax_value = S64_MAX;
   3830 		/* If we might shift our top bit out, then we know nothing */
   3831 		if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
   3832 			dst_reg->umin_value = 0;
   3833 			dst_reg->umax_value = U64_MAX;
   3834 		} else {
   3835 			dst_reg->umin_value <<= umin_val;
   3836 			dst_reg->umax_value <<= umax_val;
   3837 		}
   3838 		dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
   3839 		/* We may learn something more from the var_off */
   3840 		__update_reg_bounds(dst_reg);
   3841 		break;
   3842 	case BPF_RSH:
   3843 		if (umax_val >= insn_bitness) {
   3844 			/* Shifts greater than 31 or 63 are undefined.
   3845 			 * This includes shifts by a negative number.
   3846 			 */
   3847 			mark_reg_unknown(env, regs, insn->dst_reg);
   3848 			break;
   3849 		}
   3850 		/* BPF_RSH is an unsigned shift.  If the value in dst_reg might
   3851 		 * be negative, then either:
   3852 		 * 1) src_reg might be zero, so the sign bit of the result is
   3853 		 *    unknown, so we lose our signed bounds
   3854 		 * 2) it's known negative, thus the unsigned bounds capture the
   3855 		 *    signed bounds
   3856 		 * 3) the signed bounds cross zero, so they tell us nothing
   3857 		 *    about the result
   3858 		 * If the value in dst_reg is known nonnegative, then again the
   3859 		 * unsigned bounts capture the signed bounds.
   3860 		 * Thus, in all cases it suffices to blow away our signed bounds
   3861 		 * and rely on inferring new ones from the unsigned bounds and
   3862 		 * var_off of the result.
   3863 		 */
   3864 		dst_reg->smin_value = S64_MIN;
   3865 		dst_reg->smax_value = S64_MAX;
   3866 		dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
   3867 		dst_reg->umin_value >>= umax_val;
   3868 		dst_reg->umax_value >>= umin_val;
   3869 		/* We may learn something more from the var_off */
   3870 		__update_reg_bounds(dst_reg);
   3871 		break;
   3872 	case BPF_ARSH:
   3873 		if (umax_val >= insn_bitness) {
   3874 			/* Shifts greater than 31 or 63 are undefined.
   3875 			 * This includes shifts by a negative number.
   3876 			 */
   3877 			mark_reg_unknown(env, regs, insn->dst_reg);
   3878 			break;
   3879 		}
   3880 
   3881 		/* Upon reaching here, src_known is true and
   3882 		 * umax_val is equal to umin_val.
   3883 		 */
   3884 		dst_reg->smin_value >>= umin_val;
   3885 		dst_reg->smax_value >>= umin_val;
   3886 		dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val);
   3887 
   3888 		/* blow away the dst_reg umin_value/umax_value and rely on
   3889 		 * dst_reg var_off to refine the result.
   3890 		 */
   3891 		dst_reg->umin_value = 0;
   3892 		dst_reg->umax_value = U64_MAX;
   3893 		__update_reg_bounds(dst_reg);
   3894 		break;
   3895 	default:
   3896 		mark_reg_unknown(env, regs, insn->dst_reg);
   3897 		break;
   3898 	}
   3899 
   3900 	if (BPF_CLASS(insn->code) != BPF_ALU64) {
   3901 		/* 32-bit ALU ops are (32,32)->32 */
   3902 		coerce_reg_to_size(dst_reg, 4);
   3903 	}
   3904 
   3905 	__reg_deduce_bounds(dst_reg);
   3906 	__reg_bound_offset(dst_reg);
   3907 	return 0;
   3908 }
   3909 
   3910 /* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
   3911  * and var_off.
   3912  */
   3913 static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
   3914 				   struct bpf_insn *insn)
   3915 {
   3916 	struct bpf_verifier_state *vstate = env->cur_state;
   3917 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
   3918 	struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
   3919 	struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
   3920 	u8 opcode = BPF_OP(insn->code);
   3921 
   3922 	dst_reg = &regs[insn->dst_reg];
   3923 	src_reg = NULL;
   3924 	if (dst_reg->type != SCALAR_VALUE)
   3925 		ptr_reg = dst_reg;
   3926 	if (BPF_SRC(insn->code) == BPF_X) {
   3927 		src_reg = &regs[insn->src_reg];
   3928 		if (src_reg->type != SCALAR_VALUE) {
   3929 			if (dst_reg->type != SCALAR_VALUE) {
   3930 				/* Combining two pointers by any ALU op yields
   3931 				 * an arbitrary scalar. Disallow all math except
   3932 				 * pointer subtraction
   3933 				 */
   3934 				if (opcode == BPF_SUB && env->allow_ptr_leaks) {
   3935 					mark_reg_unknown(env, regs, insn->dst_reg);
   3936 					return 0;
   3937 				}
   3938 				verbose(env, "R%d pointer %s pointer prohibited\n",
   3939 					insn->dst_reg,
   3940 					bpf_alu_string[opcode >> 4]);
   3941 				return -EACCES;
   3942 			} else {
   3943 				/* scalar += pointer
   3944 				 * This is legal, but we have to reverse our
   3945 				 * src/dest handling in computing the range
   3946 				 */
   3947 				return adjust_ptr_min_max_vals(env, insn,
   3948 							       src_reg, dst_reg);
   3949 			}
   3950 		} else if (ptr_reg) {
   3951 			/* pointer += scalar */
   3952 			return adjust_ptr_min_max_vals(env, insn,
   3953 						       dst_reg, src_reg);
   3954 		}
   3955 	} else {
   3956 		/* Pretend the src is a reg with a known value, since we only
   3957 		 * need to be able to read from this state.
   3958 		 */
   3959 		off_reg.type = SCALAR_VALUE;
   3960 		__mark_reg_known(&off_reg, insn->imm);
   3961 		src_reg = &off_reg;
   3962 		if (ptr_reg) /* pointer += K */
   3963 			return adjust_ptr_min_max_vals(env, insn,
   3964 						       ptr_reg, src_reg);
   3965 	}
   3966 
   3967 	/* Got here implies adding two SCALAR_VALUEs */
   3968 	if (WARN_ON_ONCE(ptr_reg)) {
   3969 		print_verifier_state(env, state);
   3970 		verbose(env, "verifier internal error: unexpected ptr_reg\n");
   3971 		return -EINVAL;
   3972 	}
   3973 	if (WARN_ON(!src_reg)) {
   3974 		print_verifier_state(env, state);
   3975 		verbose(env, "verifier internal error: no src_reg\n");
   3976 		return -EINVAL;
   3977 	}
   3978 	return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
   3979 }
   3980 
   3981 /* check validity of 32-bit and 64-bit arithmetic operations */
   3982 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
   3983 {
   3984 	struct bpf_reg_state *regs = cur_regs(env);
   3985 	u8 opcode = BPF_OP(insn->code);
   3986 	int err;
   3987 
   3988 	if (opcode == BPF_END || opcode == BPF_NEG) {
   3989 		if (opcode == BPF_NEG) {
   3990 			if (BPF_SRC(insn->code) != 0 ||
   3991 			    insn->src_reg != BPF_REG_0 ||
   3992 			    insn->off != 0 || insn->imm != 0) {
   3993 				verbose(env, "BPF_NEG uses reserved fields\n");
   3994 				return -EINVAL;
   3995 			}
   3996 		} else {
   3997 			if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
   3998 			    (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
   3999 			    BPF_CLASS(insn->code) == BPF_ALU64) {
   4000 				verbose(env, "BPF_END uses reserved fields\n");
   4001 				return -EINVAL;
   4002 			}
   4003 		}
   4004 
   4005 		/* check src operand */
   4006 		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
   4007 		if (err)
   4008 			return err;
   4009 
   4010 		if (is_pointer_value(env, insn->dst_reg)) {
   4011 			verbose(env, "R%d pointer arithmetic prohibited\n",
   4012 				insn->dst_reg);
   4013 			return -EACCES;
   4014 		}
   4015 
   4016 		/* check dest operand */
   4017 		err = check_reg_arg(env, insn->dst_reg, DST_OP);
   4018 		if (err)
   4019 			return err;
   4020 
   4021 	} else if (opcode == BPF_MOV) {
   4022 
   4023 		if (BPF_SRC(insn->code) == BPF_X) {
   4024 			if (insn->imm != 0 || insn->off != 0) {
   4025 				verbose(env, "BPF_MOV uses reserved fields\n");
   4026 				return -EINVAL;
   4027 			}
   4028 
   4029 			/* check src operand */
   4030 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
   4031 			if (err)
   4032 				return err;
   4033 		} else {
   4034 			if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
   4035 				verbose(env, "BPF_MOV uses reserved fields\n");
   4036 				return -EINVAL;
   4037 			}
   4038 		}
   4039 
   4040 		/* check dest operand, mark as required later */
   4041 		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
   4042 		if (err)
   4043 			return err;
   4044 
   4045 		if (BPF_SRC(insn->code) == BPF_X) {
   4046 			struct bpf_reg_state *src_reg = regs + insn->src_reg;
   4047 			struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
   4048 
   4049 			if (BPF_CLASS(insn->code) == BPF_ALU64) {
   4050 				/* case: R1 = R2
   4051 				 * copy register state to dest reg
   4052 				 */
   4053 				*dst_reg = *src_reg;
   4054 				dst_reg->live |= REG_LIVE_WRITTEN;
   4055 			} else {
   4056 				/* R1 = (u32) R2 */
   4057 				if (is_pointer_value(env, insn->src_reg)) {
   4058 					verbose(env,
   4059 						"R%d partial copy of pointer\n",
   4060 						insn->src_reg);
   4061 					return -EACCES;
   4062 				} else if (src_reg->type == SCALAR_VALUE) {
   4063 					*dst_reg = *src_reg;
   4064 					dst_reg->live |= REG_LIVE_WRITTEN;
   4065 				} else {
   4066 					mark_reg_unknown(env, regs,
   4067 							 insn->dst_reg);
   4068 				}
   4069 				coerce_reg_to_size(dst_reg, 4);
   4070 			}
   4071 		} else {
   4072 			/* case: R = imm
   4073 			 * remember the value we stored into this reg
   4074 			 */
   4075 			/* clear any state __mark_reg_known doesn't set */
   4076 			mark_reg_unknown(env, regs, insn->dst_reg);
   4077 			regs[insn->dst_reg].type = SCALAR_VALUE;
   4078 			if (BPF_CLASS(insn->code) == BPF_ALU64) {
   4079 				__mark_reg_known(regs + insn->dst_reg,
   4080 						 insn->imm);
   4081 			} else {
   4082 				__mark_reg_known(regs + insn->dst_reg,
   4083 						 (u32)insn->imm);
   4084 			}
   4085 		}
   4086 
   4087 	} else if (opcode > BPF_END) {
   4088 		verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
   4089 		return -EINVAL;
   4090 
   4091 	} else {	/* all other ALU ops: and, sub, xor, add, ... */
   4092 
   4093 		if (BPF_SRC(insn->code) == BPF_X) {
   4094 			if (insn->imm != 0 || insn->off != 0) {
   4095 				verbose(env, "BPF_ALU uses reserved fields\n");
   4096 				return -EINVAL;
   4097 			}
   4098 			/* check src1 operand */
   4099 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
   4100 			if (err)
   4101 				return err;
   4102 		} else {
   4103 			if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
   4104 				verbose(env, "BPF_ALU uses reserved fields\n");
   4105 				return -EINVAL;
   4106 			}
   4107 		}
   4108 
   4109 		/* check src2 operand */
   4110 		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
   4111 		if (err)
   4112 			return err;
   4113 
   4114 		if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
   4115 		    BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
   4116 			verbose(env, "div by zero\n");
   4117 			return -EINVAL;
   4118 		}
   4119 
   4120 		if ((opcode == BPF_LSH || opcode == BPF_RSH ||
   4121 		     opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
   4122 			int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
   4123 
   4124 			if (insn->imm < 0 || insn->imm >= size) {
   4125 				verbose(env, "invalid shift %d\n", insn->imm);
   4126 				return -EINVAL;
   4127 			}
   4128 		}
   4129 
   4130 		/* check dest operand */
   4131 		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
   4132 		if (err)
   4133 			return err;
   4134 
   4135 		return adjust_reg_min_max_vals(env, insn);
   4136 	}
   4137 
   4138 	return 0;
   4139 }
   4140 
   4141 static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
   4142 				   struct bpf_reg_state *dst_reg,
   4143 				   enum bpf_reg_type type,
   4144 				   bool range_right_open)
   4145 {
   4146 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
   4147 	struct bpf_reg_state *regs = state->regs, *reg;
   4148 	u16 new_range;
   4149 	int i, j;
   4150 
   4151 	if (dst_reg->off < 0 ||
   4152 	    (dst_reg->off == 0 && range_right_open))
   4153 		/* This doesn't give us any range */
   4154 		return;
   4155 
   4156 	if (dst_reg->umax_value > MAX_PACKET_OFF ||
   4157 	    dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
   4158 		/* Risk of overflow.  For instance, ptr + (1<<63) may be less
   4159 		 * than pkt_end, but that's because it's also less than pkt.
   4160 		 */
   4161 		return;
   4162 
   4163 	new_range = dst_reg->off;
   4164 	if (range_right_open)
   4165 		new_range--;
   4166 
   4167 	/* Examples for register markings:
   4168 	 *
   4169 	 * pkt_data in dst register:
   4170 	 *
   4171 	 *   r2 = r3;
   4172 	 *   r2 += 8;
   4173 	 *   if (r2 > pkt_end) goto <handle exception>
   4174 	 *   <access okay>
   4175 	 *
   4176 	 *   r2 = r3;
   4177 	 *   r2 += 8;
   4178 	 *   if (r2 < pkt_end) goto <access okay>
   4179 	 *   <handle exception>
   4180 	 *
   4181 	 *   Where:
   4182 	 *     r2 == dst_reg, pkt_end == src_reg
   4183 	 *     r2=pkt(id=n,off=8,r=0)
   4184 	 *     r3=pkt(id=n,off=0,r=0)
   4185 	 *
   4186 	 * pkt_data in src register:
   4187 	 *
   4188 	 *   r2 = r3;
   4189 	 *   r2 += 8;
   4190 	 *   if (pkt_end >= r2) goto <access okay>
   4191 	 *   <handle exception>
   4192 	 *
   4193 	 *   r2 = r3;
   4194 	 *   r2 += 8;
   4195 	 *   if (pkt_end <= r2) goto <handle exception>
   4196 	 *   <access okay>
   4197 	 *
   4198 	 *   Where:
   4199 	 *     pkt_end == dst_reg, r2 == src_reg
   4200 	 *     r2=pkt(id=n,off=8,r=0)
   4201 	 *     r3=pkt(id=n,off=0,r=0)
   4202 	 *
   4203 	 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
   4204 	 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
   4205 	 * and [r3, r3 + 8-1) respectively is safe to access depending on
   4206 	 * the check.
   4207 	 */
   4208 
   4209 	/* If our ids match, then we must have the same max_value.  And we
   4210 	 * don't care about the other reg's fixed offset, since if it's too big
   4211 	 * the range won't allow anything.
   4212 	 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
   4213 	 */
   4214 	for (i = 0; i < MAX_BPF_REG; i++)
   4215 		if (regs[i].type == type && regs[i].id == dst_reg->id)
   4216 			/* keep the maximum range already checked */
   4217 			regs[i].range = max(regs[i].range, new_range);
   4218 
   4219 	for (j = 0; j <= vstate->curframe; j++) {
   4220 		state = vstate->frame[j];
   4221 		bpf_for_each_spilled_reg(i, state, reg) {
   4222 			if (!reg)
   4223 				continue;
   4224 			if (reg->type == type && reg->id == dst_reg->id)
   4225 				reg->range = max(reg->range, new_range);
   4226 		}
   4227 	}
   4228 }
   4229 
   4230 /* compute branch direction of the expression "if (reg opcode val) goto target;"
   4231  * and return:
   4232  *  1 - branch will be taken and "goto target" will be executed
   4233  *  0 - branch will not be taken and fall-through to next insn
   4234  * -1 - unknown. Example: "if (reg < 5)" is unknown when register value range [0,10]
   4235  */
   4236 static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode,
   4237 			   bool is_jmp32)
   4238 {
   4239 	struct bpf_reg_state reg_lo;
   4240 	s64 sval;
   4241 
   4242 	if (__is_pointer_value(false, reg))
   4243 		return -1;
   4244 
   4245 	if (is_jmp32) {
   4246 		reg_lo = *reg;
   4247 		reg = &reg_lo;
   4248 		/* For JMP32, only low 32 bits are compared, coerce_reg_to_size
   4249 		 * could truncate high bits and update umin/umax according to
   4250 		 * information of low bits.
   4251 		 */
   4252 		coerce_reg_to_size(reg, 4);
   4253 		/* smin/smax need special handling. For example, after coerce,
   4254 		 * if smin_value is 0x00000000ffffffffLL, the value is -1 when
   4255 		 * used as operand to JMP32. It is a negative number from s32's
   4256 		 * point of view, while it is a positive number when seen as
   4257 		 * s64. The smin/smax are kept as s64, therefore, when used with
   4258 		 * JMP32, they need to be transformed into s32, then sign
   4259 		 * extended back to s64.
   4260 		 *
   4261 		 * Also, smin/smax were copied from umin/umax. If umin/umax has
   4262 		 * different sign bit, then min/max relationship doesn't
   4263 		 * maintain after casting into s32, for this case, set smin/smax
   4264 		 * to safest range.
   4265 		 */
   4266 		if ((reg->umax_value ^ reg->umin_value) &
   4267 		    (1ULL << 31)) {
   4268 			reg->smin_value = S32_MIN;
   4269 			reg->smax_value = S32_MAX;
   4270 		}
   4271 		reg->smin_value = (s64)(s32)reg->smin_value;
   4272 		reg->smax_value = (s64)(s32)reg->smax_value;
   4273 
   4274 		val = (u32)val;
   4275 		sval = (s64)(s32)val;
   4276 	} else {
   4277 		sval = (s64)val;
   4278 	}
   4279 
   4280 	switch (opcode) {
   4281 	case BPF_JEQ:
   4282 		if (tnum_is_const(reg->var_off))
   4283 			return !!tnum_equals_const(reg->var_off, val);
   4284 		break;
   4285 	case BPF_JNE:
   4286 		if (tnum_is_const(reg->var_off))
   4287 			return !tnum_equals_const(reg->var_off, val);
   4288 		break;
   4289 	case BPF_JSET:
   4290 		if ((~reg->var_off.mask & reg->var_off.value) & val)
   4291 			return 1;
   4292 		if (!((reg->var_off.mask | reg->var_off.value) & val))
   4293 			return 0;
   4294 		break;
   4295 	case BPF_JGT:
   4296 		if (reg->umin_value > val)
   4297 			return 1;
   4298 		else if (reg->umax_value <= val)
   4299 			return 0;
   4300 		break;
   4301 	case BPF_JSGT:
   4302 		if (reg->smin_value > sval)
   4303 			return 1;
   4304 		else if (reg->smax_value < sval)
   4305 			return 0;
   4306 		break;
   4307 	case BPF_JLT:
   4308 		if (reg->umax_value < val)
   4309 			return 1;
   4310 		else if (reg->umin_value >= val)
   4311 			return 0;
   4312 		break;
   4313 	case BPF_JSLT:
   4314 		if (reg->smax_value < sval)
   4315 			return 1;
   4316 		else if (reg->smin_value >= sval)
   4317 			return 0;
   4318 		break;
   4319 	case BPF_JGE:
   4320 		if (reg->umin_value >= val)
   4321 			return 1;
   4322 		else if (reg->umax_value < val)
   4323 			return 0;
   4324 		break;
   4325 	case BPF_JSGE:
   4326 		if (reg->smin_value >= sval)
   4327 			return 1;
   4328 		else if (reg->smax_value < sval)
   4329 			return 0;
   4330 		break;
   4331 	case BPF_JLE:
   4332 		if (reg->umax_value <= val)
   4333 			return 1;
   4334 		else if (reg->umin_value > val)
   4335 			return 0;
   4336 		break;
   4337 	case BPF_JSLE:
   4338 		if (reg->smax_value <= sval)
   4339 			return 1;
   4340 		else if (reg->smin_value > sval)
   4341 			return 0;
   4342 		break;
   4343 	}
   4344 
   4345 	return -1;
   4346 }
   4347 
   4348 /* Generate min value of the high 32-bit from TNUM info. */
   4349 static u64 gen_hi_min(struct tnum var)
   4350 {
   4351 	return var.value & ~0xffffffffULL;
   4352 }
   4353 
   4354 /* Generate max value of the high 32-bit from TNUM info. */
   4355 static u64 gen_hi_max(struct tnum var)
   4356 {
   4357 	return (var.value | var.mask) & ~0xffffffffULL;
   4358 }
   4359 
   4360 /* Return true if VAL is compared with a s64 sign extended from s32, and they
   4361  * are with the same signedness.
   4362  */
   4363 static bool cmp_val_with_extended_s64(s64 sval, struct bpf_reg_state *reg)
   4364 {
   4365 	return ((s32)sval >= 0 &&
   4366 		reg->smin_value >= 0 && reg->smax_value <= S32_MAX) ||
   4367 	       ((s32)sval < 0 &&
   4368 		reg->smax_value <= 0 && reg->smin_value >= S32_MIN);
   4369 }
   4370 
   4371 /* Adjusts the register min/max values in the case that the dst_reg is the
   4372  * variable register that we are working on, and src_reg is a constant or we're
   4373  * simply doing a BPF_K check.
   4374  * In JEQ/JNE cases we also adjust the var_off values.
   4375  */
   4376 static void reg_set_min_max(struct bpf_reg_state *true_reg,
   4377 			    struct bpf_reg_state *false_reg, u64 val,
   4378 			    u8 opcode, bool is_jmp32)
   4379 {
   4380 	s64 sval;
   4381 
   4382 	/* If the dst_reg is a pointer, we can't learn anything about its
   4383 	 * variable offset from the compare (unless src_reg were a pointer into
   4384 	 * the same object, but we don't bother with that.
   4385 	 * Since false_reg and true_reg have the same type by construction, we
   4386 	 * only need to check one of them for pointerness.
   4387 	 */
   4388 	if (__is_pointer_value(false, false_reg))
   4389 		return;
   4390 
   4391 	val = is_jmp32 ? (u32)val : val;
   4392 	sval = is_jmp32 ? (s64)(s32)val : (s64)val;
   4393 
   4394 	switch (opcode) {
   4395 	case BPF_JEQ:
   4396 	case BPF_JNE:
   4397 	{
   4398 		struct bpf_reg_state *reg =
   4399 			opcode == BPF_JEQ ? true_reg : false_reg;
   4400 
   4401 		/* For BPF_JEQ, if this is false we know nothing Jon Snow, but
   4402 		 * if it is true we know the value for sure. Likewise for
   4403 		 * BPF_JNE.
   4404 		 */
   4405 		if (is_jmp32) {
   4406 			u64 old_v = reg->var_off.value;
   4407 			u64 hi_mask = ~0xffffffffULL;
   4408 
   4409 			reg->var_off.value = (old_v & hi_mask) | val;
   4410 			reg->var_off.mask &= hi_mask;
   4411 		} else {
   4412 			__mark_reg_known(reg, val);
   4413 		}
   4414 		break;
   4415 	}
   4416 	case BPF_JSET:
   4417 		false_reg->var_off = tnum_and(false_reg->var_off,
   4418 					      tnum_const(~val));
   4419 		if (is_power_of_2(val))
   4420 			true_reg->var_off = tnum_or(true_reg->var_off,
   4421 						    tnum_const(val));
   4422 		break;
   4423 	case BPF_JGE:
   4424 	case BPF_JGT:
   4425 	{
   4426 		u64 false_umax = opcode == BPF_JGT ? val    : val - 1;
   4427 		u64 true_umin = opcode == BPF_JGT ? val + 1 : val;
   4428 
   4429 		if (is_jmp32) {
   4430 			false_umax += gen_hi_max(false_reg->var_off);
   4431 			true_umin += gen_hi_min(true_reg->var_off);
   4432 		}
   4433 		false_reg->umax_value = min(false_reg->umax_value, false_umax);
   4434 		true_reg->umin_value = max(true_reg->umin_value, true_umin);
   4435 		break;
   4436 	}
   4437 	case BPF_JSGE:
   4438 	case BPF_JSGT:
   4439 	{
   4440 		s64 false_smax = opcode == BPF_JSGT ? sval    : sval - 1;
   4441 		s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval;
   4442 
   4443 		/* If the full s64 was not sign-extended from s32 then don't
   4444 		 * deduct further info.
   4445 		 */
   4446 		if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
   4447 			break;
   4448 		false_reg->smax_value = min(false_reg->smax_value, false_smax);
   4449 		true_reg->smin_value = max(true_reg->smin_value, true_smin);
   4450 		break;
   4451 	}
   4452 	case BPF_JLE:
   4453 	case BPF_JLT:
   4454 	{
   4455 		u64 false_umin = opcode == BPF_JLT ? val    : val + 1;
   4456 		u64 true_umax = opcode == BPF_JLT ? val - 1 : val;
   4457 
   4458 		if (is_jmp32) {
   4459 			false_umin += gen_hi_min(false_reg->var_off);
   4460 			true_umax += gen_hi_max(true_reg->var_off);
   4461 		}
   4462 		false_reg->umin_value = max(false_reg->umin_value, false_umin);
   4463 		true_reg->umax_value = min(true_reg->umax_value, true_umax);
   4464 		break;
   4465 	}
   4466 	case BPF_JSLE:
   4467 	case BPF_JSLT:
   4468 	{
   4469 		s64 false_smin = opcode == BPF_JSLT ? sval    : sval + 1;
   4470 		s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval;
   4471 
   4472 		if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
   4473 			break;
   4474 		false_reg->smin_value = max(false_reg->smin_value, false_smin);
   4475 		true_reg->smax_value = min(true_reg->smax_value, true_smax);
   4476 		break;
   4477 	}
   4478 	default:
   4479 		break;
   4480 	}
   4481 
   4482 	__reg_deduce_bounds(false_reg);
   4483 	__reg_deduce_bounds(true_reg);
   4484 	/* We might have learned some bits from the bounds. */
   4485 	__reg_bound_offset(false_reg);
   4486 	__reg_bound_offset(true_reg);
   4487 	/* Intersecting with the old var_off might have improved our bounds
   4488 	 * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
   4489 	 * then new var_off is (0; 0x7f...fc) which improves our umax.
   4490 	 */
   4491 	__update_reg_bounds(false_reg);
   4492 	__update_reg_bounds(true_reg);
   4493 }
   4494 
   4495 /* Same as above, but for the case that dst_reg holds a constant and src_reg is
   4496  * the variable reg.
   4497  */
   4498 static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
   4499 				struct bpf_reg_state *false_reg, u64 val,
   4500 				u8 opcode, bool is_jmp32)
   4501 {
   4502 	s64 sval;
   4503 
   4504 	if (__is_pointer_value(false, false_reg))
   4505 		return;
   4506 
   4507 	val = is_jmp32 ? (u32)val : val;
   4508 	sval = is_jmp32 ? (s64)(s32)val : (s64)val;
   4509 
   4510 	switch (opcode) {
   4511 	case BPF_JEQ:
   4512 	case BPF_JNE:
   4513 	{
   4514 		struct bpf_reg_state *reg =
   4515 			opcode == BPF_JEQ ? true_reg : false_reg;
   4516 
   4517 		if (is_jmp32) {
   4518 			u64 old_v = reg->var_off.value;
   4519 			u64 hi_mask = ~0xffffffffULL;
   4520 
   4521 			reg->var_off.value = (old_v & hi_mask) | val;
   4522 			reg->var_off.mask &= hi_mask;
   4523 		} else {
   4524 			__mark_reg_known(reg, val);
   4525 		}
   4526 		break;
   4527 	}
   4528 	case BPF_JSET:
   4529 		false_reg->var_off = tnum_and(false_reg->var_off,
   4530 					      tnum_const(~val));
   4531 		if (is_power_of_2(val))
   4532 			true_reg->var_off = tnum_or(true_reg->var_off,
   4533 						    tnum_const(val));
   4534 		break;
   4535 	case BPF_JGE:
   4536 	case BPF_JGT:
   4537 	{
   4538 		u64 false_umin = opcode == BPF_JGT ? val    : val + 1;
   4539 		u64 true_umax = opcode == BPF_JGT ? val - 1 : val;
   4540 
   4541 		if (is_jmp32) {
   4542 			false_umin += gen_hi_min(false_reg->var_off);
   4543 			true_umax += gen_hi_max(true_reg->var_off);
   4544 		}
   4545 		false_reg->umin_value = max(false_reg->umin_value, false_umin);
   4546 		true_reg->umax_value = min(true_reg->umax_value, true_umax);
   4547 		break;
   4548 	}
   4549 	case BPF_JSGE:
   4550 	case BPF_JSGT:
   4551 	{
   4552 		s64 false_smin = opcode == BPF_JSGT ? sval    : sval + 1;
   4553 		s64 true_smax = opcode == BPF_JSGT ? sval - 1 : sval;
   4554 
   4555 		if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
   4556 			break;
   4557 		false_reg->smin_value = max(false_reg->smin_value, false_smin);
   4558 		true_reg->smax_value = min(true_reg->smax_value, true_smax);
   4559 		break;
   4560 	}
   4561 	case BPF_JLE:
   4562 	case BPF_JLT:
   4563 	{
   4564 		u64 false_umax = opcode == BPF_JLT ? val    : val - 1;
   4565 		u64 true_umin = opcode == BPF_JLT ? val + 1 : val;
   4566 
   4567 		if (is_jmp32) {
   4568 			false_umax += gen_hi_max(false_reg->var_off);
   4569 			true_umin += gen_hi_min(true_reg->var_off);
   4570 		}
   4571 		false_reg->umax_value = min(false_reg->umax_value, false_umax);
   4572 		true_reg->umin_value = max(true_reg->umin_value, true_umin);
   4573 		break;
   4574 	}
   4575 	case BPF_JSLE:
   4576 	case BPF_JSLT:
   4577 	{
   4578 		s64 false_smax = opcode == BPF_JSLT ? sval    : sval - 1;
   4579 		s64 true_smin = opcode == BPF_JSLT ? sval + 1 : sval;
   4580 
   4581 		if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
   4582 			break;
   4583 		false_reg->smax_value = min(false_reg->smax_value, false_smax);
   4584 		true_reg->smin_value = max(true_reg->smin_value, true_smin);
   4585 		break;
   4586 	}
   4587 	default:
   4588 		break;
   4589 	}
   4590 
   4591 	__reg_deduce_bounds(false_reg);
   4592 	__reg_deduce_bounds(true_reg);
   4593 	/* We might have learned some bits from the bounds. */
   4594 	__reg_bound_offset(false_reg);
   4595 	__reg_bound_offset(true_reg);
   4596 	/* Intersecting with the old var_off might have improved our bounds
   4597 	 * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
   4598 	 * then new var_off is (0; 0x7f...fc) which improves our umax.
   4599 	 */
   4600 	__update_reg_bounds(false_reg);
   4601 	__update_reg_bounds(true_reg);
   4602 }
   4603 
   4604 /* Regs are known to be equal, so intersect their min/max/var_off */
   4605 static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
   4606 				  struct bpf_reg_state *dst_reg)
   4607 {
   4608 	src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
   4609 							dst_reg->umin_value);
   4610 	src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
   4611 							dst_reg->umax_value);
   4612 	src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
   4613 							dst_reg->smin_value);
   4614 	src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
   4615 							dst_reg->smax_value);
   4616 	src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
   4617 							     dst_reg->var_off);
   4618 	/* We might have learned new bounds from the var_off. */
   4619 	__update_reg_bounds(src_reg);
   4620 	__update_reg_bounds(dst_reg);
   4621 	/* We might have learned something about the sign bit. */
   4622 	__reg_deduce_bounds(src_reg);
   4623 	__reg_deduce_bounds(dst_reg);
   4624 	/* We might have learned some bits from the bounds. */
   4625 	__reg_bound_offset(src_reg);
   4626 	__reg_bound_offset(dst_reg);
   4627 	/* Intersecting with the old var_off might have improved our bounds
   4628 	 * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
   4629 	 * then new var_off is (0; 0x7f...fc) which improves our umax.
   4630 	 */
   4631 	__update_reg_bounds(src_reg);
   4632 	__update_reg_bounds(dst_reg);
   4633 }
   4634 
   4635 static void reg_combine_min_max(struct bpf_reg_state *true_src,
   4636 				struct bpf_reg_state *true_dst,
   4637 				struct bpf_reg_state *false_src,
   4638 				struct bpf_reg_state *false_dst,
   4639 				u8 opcode)
   4640 {
   4641 	switch (opcode) {
   4642 	case BPF_JEQ:
   4643 		__reg_combine_min_max(true_src, true_dst);
   4644 		break;
   4645 	case BPF_JNE:
   4646 		__reg_combine_min_max(false_src, false_dst);
   4647 		break;
   4648 	}
   4649 }
   4650 
   4651 static void mark_ptr_or_null_reg(struct bpf_func_state *state,
   4652 				 struct bpf_reg_state *reg, u32 id,
   4653 				 bool is_null)
   4654 {
   4655 	if (reg_type_may_be_null(reg->type) && reg->id == id) {
   4656 		/* Old offset (both fixed and variable parts) should
   4657 		 * have been known-zero, because we don't allow pointer
   4658 		 * arithmetic on pointers that might be NULL.
   4659 		 */
   4660 		if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
   4661 				 !tnum_equals_const(reg->var_off, 0) ||
   4662 				 reg->off)) {
   4663 			__mark_reg_known_zero(reg);
   4664 			reg->off = 0;
   4665 		}
   4666 		if (is_null) {
   4667 			reg->type = SCALAR_VALUE;
   4668 		} else if (reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
   4669 			if (reg->map_ptr->inner_map_meta) {
   4670 				reg->type = CONST_PTR_TO_MAP;
   4671 				reg->map_ptr = reg->map_ptr->inner_map_meta;
   4672 			} else {
   4673 				reg->type = PTR_TO_MAP_VALUE;
   4674 			}
   4675 		} else if (reg->type == PTR_TO_SOCKET_OR_NULL) {
   4676 			reg->type = PTR_TO_SOCKET;
   4677 		} else if (reg->type == PTR_TO_SOCK_COMMON_OR_NULL) {
   4678 			reg->type = PTR_TO_SOCK_COMMON;
   4679 		} else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) {
   4680 			reg->type = PTR_TO_TCP_SOCK;
   4681 		}
   4682 		if (is_null) {
   4683 			/* We don't need id and ref_obj_id from this point
   4684 			 * onwards anymore, thus we should better reset it,
   4685 			 * so that state pruning has chances to take effect.
   4686 			 */
   4687 			reg->id = 0;
   4688 			reg->ref_obj_id = 0;
   4689 		} else if (!reg_may_point_to_spin_lock(reg)) {
   4690 			/* For not-NULL ptr, reg->ref_obj_id will be reset
   4691 			 * in release_reg_references().
   4692 			 *
   4693 			 * reg->id is still used by spin_lock ptr. Other
   4694 			 * than spin_lock ptr type, reg->id can be reset.
   4695 			 */
   4696 			reg->id = 0;
   4697 		}
   4698 	}
   4699 }
   4700 
   4701 /* The logic is similar to find_good_pkt_pointers(), both could eventually
   4702  * be folded together at some point.
   4703  */
   4704 static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
   4705 				  bool is_null)
   4706 {
   4707 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
   4708 	struct bpf_reg_state *reg, *regs = state->regs;
   4709 	u32 ref_obj_id = regs[regno].ref_obj_id;
   4710 	u32 id = regs[regno].id;
   4711 	int i, j;
   4712 
   4713 	if (ref_obj_id && ref_obj_id == id && is_null)
   4714 		/* regs[regno] is in the " == NULL" branch.
   4715 		 * No one could have freed the reference state before
   4716 		 * doing the NULL check.
   4717 		 */
   4718 		WARN_ON_ONCE(release_reference_state(state, id));
   4719 
   4720 	for (i = 0; i < MAX_BPF_REG; i++)
   4721 		mark_ptr_or_null_reg(state, &regs[i], id, is_null);
   4722 
   4723 	for (j = 0; j <= vstate->curframe; j++) {
   4724 		state = vstate->frame[j];
   4725 		bpf_for_each_spilled_reg(i, state, reg) {
   4726 			if (!reg)
   4727 				continue;
   4728 			mark_ptr_or_null_reg(state, reg, id, is_null);
   4729 		}
   4730 	}
   4731 }
   4732 
   4733 static bool try_match_pkt_pointers(const struct bpf_insn *insn,
   4734 				   struct bpf_reg_state *dst_reg,
   4735 				   struct bpf_reg_state *src_reg,
   4736 				   struct bpf_verifier_state *this_branch,
   4737 				   struct bpf_verifier_state *other_branch)
   4738 {
   4739 	if (BPF_SRC(insn->code) != BPF_X)
   4740 		return false;
   4741 
   4742 	/* Pointers are always 64-bit. */
   4743 	if (BPF_CLASS(insn->code) == BPF_JMP32)
   4744 		return false;
   4745 
   4746 	switch (BPF_OP(insn->code)) {
   4747 	case BPF_JGT:
   4748 		if ((dst_reg->type == PTR_TO_PACKET &&
   4749 		     src_reg->type == PTR_TO_PACKET_END) ||
   4750 		    (dst_reg->type == PTR_TO_PACKET_META &&
   4751 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
   4752 			/* pkt_data' > pkt_end, pkt_meta' > pkt_data */
   4753 			find_good_pkt_pointers(this_branch, dst_reg,
   4754 					       dst_reg->type, false);
   4755 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
   4756 			    src_reg->type == PTR_TO_PACKET) ||
   4757 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
   4758 			    src_reg->type == PTR_TO_PACKET_META)) {
   4759 			/* pkt_end > pkt_data', pkt_data > pkt_meta' */
   4760 			find_good_pkt_pointers(other_branch, src_reg,
   4761 					       src_reg->type, true);
   4762 		} else {
   4763 			return false;
   4764 		}
   4765 		break;
   4766 	case BPF_JLT:
   4767 		if ((dst_reg->type == PTR_TO_PACKET &&
   4768 		     src_reg->type == PTR_TO_PACKET_END) ||
   4769 		    (dst_reg->type == PTR_TO_PACKET_META &&
   4770 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
   4771 			/* pkt_data' < pkt_end, pkt_meta' < pkt_data */
   4772 			find_good_pkt_pointers(other_branch, dst_reg,
   4773 					       dst_reg->type, true);
   4774 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
   4775 			    src_reg->type == PTR_TO_PACKET) ||
   4776 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
   4777 			    src_reg->type == PTR_TO_PACKET_META)) {
   4778 			/* pkt_end < pkt_data', pkt_data > pkt_meta' */
   4779 			find_good_pkt_pointers(this_branch, src_reg,
   4780 					       src_reg->type, false);
   4781 		} else {
   4782 			return false;
   4783 		}
   4784 		break;
   4785 	case BPF_JGE:
   4786 		if ((dst_reg->type == PTR_TO_PACKET &&
   4787 		     src_reg->type == PTR_TO_PACKET_END) ||
   4788 		    (dst_reg->type == PTR_TO_PACKET_META &&
   4789 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
   4790 			/* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
   4791 			find_good_pkt_pointers(this_branch, dst_reg,
   4792 					       dst_reg->type, true);
   4793 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
   4794 			    src_reg->type == PTR_TO_PACKET) ||
   4795 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
   4796 			    src_reg->type == PTR_TO_PACKET_META)) {
   4797 			/* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
   4798 			find_good_pkt_pointers(other_branch, src_reg,
   4799 					       src_reg->type, false);
   4800 		} else {
   4801 			return false;
   4802 		}
   4803 		break;
   4804 	case BPF_JLE:
   4805 		if ((dst_reg->type == PTR_TO_PACKET &&
   4806 		     src_reg->type == PTR_TO_PACKET_END) ||
   4807 		    (dst_reg->type == PTR_TO_PACKET_META &&
   4808 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
   4809 			/* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
   4810 			find_good_pkt_pointers(other_branch, dst_reg,
   4811 					       dst_reg->type, false);
   4812 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
   4813 			    src_reg->type == PTR_TO_PACKET) ||
   4814 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
   4815 			    src_reg->type == PTR_TO_PACKET_META)) {
   4816 			/* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
   4817 			find_good_pkt_pointers(this_branch, src_reg,
   4818 					       src_reg->type, true);
   4819 		} else {
   4820 			return false;
   4821 		}
   4822 		break;
   4823 	default:
   4824 		return false;
   4825 	}
   4826 
   4827 	return true;
   4828 }
   4829 
   4830 static int check_cond_jmp_op(struct bpf_verifier_env *env,
   4831 			     struct bpf_insn *insn, int *insn_idx)
   4832 {
   4833 	struct bpf_verifier_state *this_branch = env->cur_state;
   4834 	struct bpf_verifier_state *other_branch;
   4835 	struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
   4836 	struct bpf_reg_state *dst_reg, *other_branch_regs;
   4837 	u8 opcode = BPF_OP(insn->code);
   4838 	bool is_jmp32;
   4839 	int err;
   4840 
   4841 	/* Only conditional jumps are expected to reach here. */
   4842 	if (opcode == BPF_JA || opcode > BPF_JSLE) {
   4843 		verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
   4844 		return -EINVAL;
   4845 	}
   4846 
   4847 	if (BPF_SRC(insn->code) == BPF_X) {
   4848 		if (insn->imm != 0) {
   4849 			verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
   4850 			return -EINVAL;
   4851 		}
   4852 
   4853 		/* check src1 operand */
   4854 		err = check_reg_arg(env, insn->src_reg, SRC_OP);
   4855 		if (err)
   4856 			return err;
   4857 
   4858 		if (is_pointer_value(env, insn->src_reg)) {
   4859 			verbose(env, "R%d pointer comparison prohibited\n",
   4860 				insn->src_reg);
   4861 			return -EACCES;
   4862 		}
   4863 	} else {
   4864 		if (insn->src_reg != BPF_REG_0) {
   4865 			verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
   4866 			return -EINVAL;
   4867 		}
   4868 	}
   4869 
   4870 	/* check src2 operand */
   4871 	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
   4872 	if (err)
   4873 		return err;
   4874 
   4875 	dst_reg = &regs[insn->dst_reg];
   4876 	is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
   4877 
   4878 	if (BPF_SRC(insn->code) == BPF_K) {
   4879 		int pred = is_branch_taken(dst_reg, insn->imm, opcode,
   4880 					   is_jmp32);
   4881 
   4882 		if (pred == 1) {
   4883 			 /* only follow the goto, ignore fall-through */
   4884 			*insn_idx += insn->off;
   4885 			return 0;
   4886 		} else if (pred == 0) {
   4887 			/* only follow fall-through branch, since
   4888 			 * that's where the program will go
   4889 			 */
   4890 			return 0;
   4891 		}
   4892 	}
   4893 
   4894 	other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx,
   4895 				  false);
   4896 	if (!other_branch)
   4897 		return -EFAULT;
   4898 	other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
   4899 
   4900 	/* detect if we are comparing against a constant value so we can adjust
   4901 	 * our min/max values for our dst register.
   4902 	 * this is only legit if both are scalars (or pointers to the same
   4903 	 * object, I suppose, but we don't support that right now), because
   4904 	 * otherwise the different base pointers mean the offsets aren't
   4905 	 * comparable.
   4906 	 */
   4907 	if (BPF_SRC(insn->code) == BPF_X) {
   4908 		struct bpf_reg_state *src_reg = &regs[insn->src_reg];
   4909 		struct bpf_reg_state lo_reg0 = *dst_reg;
   4910 		struct bpf_reg_state lo_reg1 = *src_reg;
   4911 		struct bpf_reg_state *src_lo, *dst_lo;
   4912 
   4913 		dst_lo = &lo_reg0;
   4914 		src_lo = &lo_reg1;
   4915 		coerce_reg_to_size(dst_lo, 4);
   4916 		coerce_reg_to_size(src_lo, 4);
   4917 
   4918 		if (dst_reg->type == SCALAR_VALUE &&
   4919 		    src_reg->type == SCALAR_VALUE) {
   4920 			if (tnum_is_const(src_reg->var_off) ||
   4921 			    (is_jmp32 && tnum_is_const(src_lo->var_off)))
   4922 				reg_set_min_max(&other_branch_regs[insn->dst_reg],
   4923 						dst_reg,
   4924 						is_jmp32
   4925 						? src_lo->var_off.value
   4926 						: src_reg->var_off.value,
   4927 						opcode, is_jmp32);
   4928 			else if (tnum_is_const(dst_reg->var_off) ||
   4929 				 (is_jmp32 && tnum_is_const(dst_lo->var_off)))
   4930 				reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
   4931 						    src_reg,
   4932 						    is_jmp32
   4933 						    ? dst_lo->var_off.value
   4934 						    : dst_reg->var_off.value,
   4935 						    opcode, is_jmp32);
   4936 			else if (!is_jmp32 &&
   4937 				 (opcode == BPF_JEQ || opcode == BPF_JNE))
   4938 				/* Comparing for equality, we can combine knowledge */
   4939 				reg_combine_min_max(&other_branch_regs[insn->src_reg],
   4940 						    &other_branch_regs[insn->dst_reg],
   4941 						    src_reg, dst_reg, opcode);
   4942 		}
   4943 	} else if (dst_reg->type == SCALAR_VALUE) {
   4944 		reg_set_min_max(&other_branch_regs[insn->dst_reg],
   4945 					dst_reg, insn->imm, opcode, is_jmp32);
   4946 	}
   4947 
   4948 	/* detect if R == 0 where R is returned from bpf_map_lookup_elem().
   4949 	 * NOTE: these optimizations below are related with pointer comparison
   4950 	 *       which will never be JMP32.
   4951 	 */
   4952 	if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
   4953 	    insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
   4954 	    reg_type_may_be_null(dst_reg->type)) {
   4955 		/* Mark all identical registers in each branch as either
   4956 		 * safe or unknown depending R == 0 or R != 0 conditional.
   4957 		 */
   4958 		mark_ptr_or_null_regs(this_branch, insn->dst_reg,
   4959 				      opcode == BPF_JNE);
   4960 		mark_ptr_or_null_regs(other_branch, insn->dst_reg,
   4961 				      opcode == BPF_JEQ);
   4962 	} else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
   4963 					   this_branch, other_branch) &&
   4964 		   is_pointer_value(env, insn->dst_reg)) {
   4965 		verbose(env, "R%d pointer comparison prohibited\n",
   4966 			insn->dst_reg);
   4967 		return -EACCES;
   4968 	}
   4969 	if (env->log.level)
   4970 		print_verifier_state(env, this_branch->frame[this_branch->curframe]);
   4971 	return 0;
   4972 }
   4973 
   4974 /* return the map pointer stored inside BPF_LD_IMM64 instruction */
   4975 static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
   4976 {
   4977 	u64 imm64 = ((u64) (u32) insn[0].imm) | ((u64) (u32) insn[1].imm) << 32;
   4978 
   4979 	return (struct bpf_map *) (unsigned long) imm64;
   4980 }
   4981 
   4982 /* verify BPF_LD_IMM64 instruction */
   4983 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
   4984 {
   4985 	struct bpf_reg_state *regs = cur_regs(env);
   4986 	int err;
   4987 
   4988 	if (BPF_SIZE(insn->code) != BPF_DW) {
   4989 		verbose(env, "invalid BPF_LD_IMM insn\n");
   4990 		return -EINVAL;
   4991 	}
   4992 	if (insn->off != 0) {
   4993 		verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
   4994 		return -EINVAL;
   4995 	}
   4996 
   4997 	err = check_reg_arg(env, insn->dst_reg, DST_OP);
   4998 	if (err)
   4999 		return err;
   5000 
   5001 	if (insn->src_reg == 0) {
   5002 		u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
   5003 
   5004 		regs[insn->dst_reg].type = SCALAR_VALUE;
   5005 		__mark_reg_known(&regs[insn->dst_reg], imm);
   5006 		return 0;
   5007 	}
   5008 
   5009 	/* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */
   5010 	BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD);
   5011 
   5012 	regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
   5013 	regs[insn->dst_reg].map_ptr = ld_imm64_to_map_ptr(insn);
   5014 	return 0;
   5015 }
   5016 
   5017 static bool may_access_skb(enum bpf_prog_type type)
   5018 {
   5019 	switch (type) {
   5020 	case BPF_PROG_TYPE_SOCKET_FILTER:
   5021 	case BPF_PROG_TYPE_SCHED_CLS:
   5022 	case BPF_PROG_TYPE_SCHED_ACT:
   5023 		return true;
   5024 	default:
   5025 		return false;
   5026 	}
   5027 }
   5028 
   5029 /* verify safety of LD_ABS|LD_IND instructions:
   5030  * - they can only appear in the programs where ctx == skb
   5031  * - since they are wrappers of function calls, they scratch R1-R5 registers,
   5032  *   preserve R6-R9, and store return value into R0
   5033  *
   5034  * Implicit input:
   5035  *   ctx == skb == R6 == CTX
   5036  *
   5037  * Explicit input:
   5038  *   SRC == any register
   5039  *   IMM == 32-bit immediate
   5040  *
   5041  * Output:
   5042  *   R0 - 8/16/32-bit skb data converted to cpu endianness
   5043  */
   5044 static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
   5045 {
   5046 	struct bpf_reg_state *regs = cur_regs(env);
   5047 	u8 mode = BPF_MODE(insn->code);
   5048 	int i, err;
   5049 
   5050 	if (!may_access_skb(env->prog->type)) {
   5051 		verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
   5052 		return -EINVAL;
   5053 	}
   5054 
   5055 	if (!env->ops->gen_ld_abs) {
   5056 		verbose(env, "bpf verifier is misconfigured\n");
   5057 		return -EINVAL;
   5058 	}
   5059 
   5060 	if (env->subprog_cnt > 1) {
   5061 		/* when program has LD_ABS insn JITs and interpreter assume
   5062 		 * that r1 == ctx == skb which is not the case for callees
   5063 		 * that can have arbitrary arguments. It's problematic
   5064 		 * for main prog as well since JITs would need to analyze
   5065 		 * all functions in order to make proper register save/restore
   5066 		 * decisions in the main prog. Hence disallow LD_ABS with calls
   5067 		 */
   5068 		verbose(env, "BPF_LD_[ABS|IND] instructions cannot be mixed with bpf-to-bpf calls\n");
   5069 		return -EINVAL;
   5070 	}
   5071 
   5072 	if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
   5073 	    BPF_SIZE(insn->code) == BPF_DW ||
   5074 	    (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
   5075 		verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
   5076 		return -EINVAL;
   5077 	}
   5078 
   5079 	/* check whether implicit source operand (register R6) is readable */
   5080 	err = check_reg_arg(env, BPF_REG_6, SRC_OP);
   5081 	if (err)
   5082 		return err;
   5083 
   5084 	/* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
   5085 	 * gen_ld_abs() may terminate the program at runtime, leading to
   5086 	 * reference leak.
   5087 	 */
   5088 	err = check_reference_leak(env);
   5089 	if (err) {
   5090 		verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
   5091 		return err;
   5092 	}
   5093 
   5094 	if (env->cur_state->active_spin_lock) {
   5095 		verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n");
   5096 		return -EINVAL;
   5097 	}
   5098 
   5099 	if (regs[BPF_REG_6].type != PTR_TO_CTX) {
   5100 		verbose(env,
   5101 			"at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
   5102 		return -EINVAL;
   5103 	}
   5104 
   5105 	if (mode == BPF_IND) {
   5106 		/* check explicit source operand */
   5107 		err = check_reg_arg(env, insn->src_reg, SRC_OP);
   5108 		if (err)
   5109 			return err;
   5110 	}
   5111 
   5112 	/* reset caller saved regs to unreadable */
   5113 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
   5114 		mark_reg_not_init(env, regs, caller_saved[i]);
   5115 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
   5116 	}
   5117 
   5118 	/* mark destination R0 register as readable, since it contains
   5119 	 * the value fetched from the packet.
   5120 	 * Already marked as written above.
   5121 	 */
   5122 	mark_reg_unknown(env, regs, BPF_REG_0);
   5123 	return 0;
   5124 }
   5125 
   5126 static int check_return_code(struct bpf_verifier_env *env)
   5127 {
   5128 	struct bpf_reg_state *reg;
   5129 	struct tnum range = tnum_range(0, 1);
   5130 
   5131 	switch (env->prog->type) {
   5132 	case BPF_PROG_TYPE_CGROUP_SKB:
   5133 	case BPF_PROG_TYPE_CGROUP_SOCK:
   5134 	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
   5135 	case BPF_PROG_TYPE_SOCK_OPS:
   5136 	case BPF_PROG_TYPE_CGROUP_DEVICE:
   5137 		break;
   5138 	default:
   5139 		return 0;
   5140 	}
   5141 
   5142 	reg = cur_regs(env) + BPF_REG_0;
   5143 	if (reg->type != SCALAR_VALUE) {
   5144 		verbose(env, "At program exit the register R0 is not a known value (%s)\n",
   5145 			reg_type_str[reg->type]);
   5146 		return -EINVAL;
   5147 	}
   5148 
   5149 	if (!tnum_in(range, reg->var_off)) {
   5150 		verbose(env, "At program exit the register R0 ");
   5151 		if (!tnum_is_unknown(reg->var_off)) {
   5152 			char tn_buf[48];
   5153 
   5154 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
   5155 			verbose(env, "has value %s", tn_buf);
   5156 		} else {
   5157 			verbose(env, "has unknown scalar value");
   5158 		}
   5159 		verbose(env, " should have been 0 or 1\n");
   5160 		return -EINVAL;
   5161 	}
   5162 	return 0;
   5163 }
   5164 
   5165 /* non-recursive DFS pseudo code
   5166  * 1  procedure DFS-iterative(G,v):
   5167  * 2      label v as discovered
   5168  * 3      let S be a stack
   5169  * 4      S.push(v)
   5170  * 5      while S is not empty
   5171  * 6            t <- S.pop()
   5172  * 7            if t is what we're looking for:
   5173  * 8                return t
   5174  * 9            for all edges e in G.adjacentEdges(t) do
   5175  * 10               if edge e is already labelled
   5176  * 11                   continue with the next edge
   5177  * 12               w <- G.adjacentVertex(t,e)
   5178  * 13               if vertex w is not discovered and not explored
   5179  * 14                   label e as tree-edge
   5180  * 15                   label w as discovered
   5181  * 16                   S.push(w)
   5182  * 17                   continue at 5
   5183  * 18               else if vertex w is discovered
   5184  * 19                   label e as back-edge
   5185  * 20               else
   5186  * 21                   // vertex w is explored
   5187  * 22                   label e as forward- or cross-edge
   5188  * 23           label t as explored
   5189  * 24           S.pop()
   5190  *
   5191  * convention:
   5192  * 0x10 - discovered
   5193  * 0x11 - discovered and fall-through edge labelled
   5194  * 0x12 - discovered and fall-through and branch edges labelled
   5195  * 0x20 - explored
   5196  */
   5197 
   5198 enum {
   5199 	DISCOVERED = 0x10,
   5200 	EXPLORED = 0x20,
   5201 	FALLTHROUGH = 1,
   5202 	BRANCH = 2,
   5203 };
   5204 
   5205 #define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L)
   5206 
   5207 static int *insn_stack;	/* stack of insns to process */
   5208 static int cur_stack;	/* current stack index */
   5209 static int *insn_state;
   5210 
   5211 /* t, w, e - match pseudo-code above:
   5212  * t - index of current instruction
   5213  * w - next instruction
   5214  * e - edge
   5215  */
   5216 static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
   5217 {
   5218 	if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
   5219 		return 0;
   5220 
   5221 	if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
   5222 		return 0;
   5223 
   5224 	if (w < 0 || w >= env->prog->len) {
   5225 		verbose_linfo(env, t, "%d: ", t);
   5226 		verbose(env, "jump out of range from insn %d to %d\n", t, w);
   5227 		return -EINVAL;
   5228 	}
   5229 
   5230 	if (e == BRANCH)
   5231 		/* mark branch target for state pruning */
   5232 		env->explored_states[w] = STATE_LIST_MARK;
   5233 
   5234 	if (insn_state[w] == 0) {
   5235 		/* tree-edge */
   5236 		insn_state[t] = DISCOVERED | e;
   5237 		insn_state[w] = DISCOVERED;
   5238 		if (cur_stack >= env->prog->len)
   5239 			return -E2BIG;
   5240 		insn_stack[cur_stack++] = w;
   5241 		return 1;
   5242 	} else if ((insn_state[w] & 0xF0) == DISCOVERED) {
   5243 		verbose_linfo(env, t, "%d: ", t);
   5244 		verbose_linfo(env, w, "%d: ", w);
   5245 		verbose(env, "back-edge from insn %d to %d\n", t, w);
   5246 		return -EINVAL;
   5247 	} else if (insn_state[w] == EXPLORED) {
   5248 		/* forward- or cross-edge */
   5249 		insn_state[t] = DISCOVERED | e;
   5250 	} else {
   5251 		verbose(env, "insn state internal bug\n");
   5252 		return -EFAULT;
   5253 	}
   5254 	return 0;
   5255 }
   5256 
   5257 /* non-recursive depth-first-search to detect loops in BPF program
   5258  * loop == back-edge in directed graph
   5259  */
   5260 static int check_cfg(struct bpf_verifier_env *env)
   5261 {
   5262 	struct bpf_insn *insns = env->prog->insnsi;
   5263 	int insn_cnt = env->prog->len;
   5264 	int ret = 0;
   5265 	int i, t;
   5266 
   5267 	insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
   5268 	if (!insn_state)
   5269 		return -ENOMEM;
   5270 
   5271 	insn_stack = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
   5272 	if (!insn_stack) {
   5273 		kfree(insn_state);
   5274 		return -ENOMEM;
   5275 	}
   5276 
   5277 	insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
   5278 	insn_stack[0] = 0; /* 0 is the first instruction */
   5279 	cur_stack = 1;
   5280 
   5281 peek_stack:
   5282 	if (cur_stack == 0)
   5283 		goto check_state;
   5284 	t = insn_stack[cur_stack - 1];
   5285 
   5286 	if (BPF_CLASS(insns[t].code) == BPF_JMP ||
   5287 	    BPF_CLASS(insns[t].code) == BPF_JMP32) {
   5288 		u8 opcode = BPF_OP(insns[t].code);
   5289 
   5290 		if (opcode == BPF_EXIT) {
   5291 			goto mark_explored;
   5292 		} else if (opcode == BPF_CALL) {
   5293 			ret = push_insn(t, t + 1, FALLTHROUGH, env);
   5294 			if (ret == 1)
   5295 				goto peek_stack;
   5296 			else if (ret < 0)
   5297 				goto err_free;
   5298 			if (t + 1 < insn_cnt)
   5299 				env->explored_states[t + 1] = STATE_LIST_MARK;
   5300 			if (insns[t].src_reg == BPF_PSEUDO_CALL) {
   5301 				env->explored_states[t] = STATE_LIST_MARK;
   5302 				ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env);
   5303 				if (ret == 1)
   5304 					goto peek_stack;
   5305 				else if (ret < 0)
   5306 					goto err_free;
   5307 			}
   5308 		} else if (opcode == BPF_JA) {
   5309 			if (BPF_SRC(insns[t].code) != BPF_K) {
   5310 				ret = -EINVAL;
   5311 				goto err_free;
   5312 			}
   5313 			/* unconditional jump with single edge */
   5314 			ret = push_insn(t, t + insns[t].off + 1,
   5315 					FALLTHROUGH, env);
   5316 			if (ret == 1)
   5317 				goto peek_stack;
   5318 			else if (ret < 0)
   5319 				goto err_free;
   5320 			/* tell verifier to check for equivalent states
   5321 			 * after every call and jump
   5322 			 */
   5323 			if (t + 1 < insn_cnt)
   5324 				env->explored_states[t + 1] = STATE_LIST_MARK;
   5325 		} else {
   5326 			/* conditional jump with two edges */
   5327 			env->explored_states[t] = STATE_LIST_MARK;
   5328 			ret = push_insn(t, t + 1, FALLTHROUGH, env);
   5329 			if (ret == 1)
   5330 				goto peek_stack;
   5331 			else if (ret < 0)
   5332 				goto err_free;
   5333 
   5334 			ret = push_insn(t, t + insns[t].off + 1, BRANCH, env);
   5335 			if (ret == 1)
   5336 				goto peek_stack;
   5337 			else if (ret < 0)
   5338 				goto err_free;
   5339 		}
   5340 	} else {
   5341 		/* all other non-branch instructions with single
   5342 		 * fall-through edge
   5343 		 */
   5344 		ret = push_insn(t, t + 1, FALLTHROUGH, env);
   5345 		if (ret == 1)
   5346 			goto peek_stack;
   5347 		else if (ret < 0)
   5348 			goto err_free;
   5349 	}
   5350 
   5351 mark_explored:
   5352 	insn_state[t] = EXPLORED;
   5353 	if (cur_stack-- <= 0) {
   5354 		verbose(env, "pop stack internal bug\n");
   5355 		ret = -EFAULT;
   5356 		goto err_free;
   5357 	}
   5358 	goto peek_stack;
   5359 
   5360 check_state:
   5361 	for (i = 0; i < insn_cnt; i++) {
   5362 		if (insn_state[i] != EXPLORED) {
   5363 			verbose(env, "unreachable insn %d\n", i);
   5364 			ret = -EINVAL;
   5365 			goto err_free;
   5366 		}
   5367 	}
   5368 	ret = 0; /* cfg looks good */
   5369 
   5370 err_free:
   5371 	kfree(insn_state);
   5372 	kfree(insn_stack);
   5373 	return ret;
   5374 }
   5375 
   5376 /* The minimum supported BTF func info size */
   5377 #define MIN_BPF_FUNCINFO_SIZE	8
   5378 #define MAX_FUNCINFO_REC_SIZE	252
   5379 
   5380 static int check_btf_func(struct bpf_verifier_env *env,
   5381 			  const union bpf_attr *attr,
   5382 			  union bpf_attr __user *uattr)
   5383 {
   5384 	u32 i, nfuncs, urec_size, min_size;
   5385 	u32 krec_size = sizeof(struct bpf_func_info);
   5386 	struct bpf_func_info *krecord;
   5387 	const struct btf_type *type;
   5388 	struct bpf_prog *prog;
   5389 	const struct btf *btf;
   5390 	void __user *urecord;
   5391 	u32 prev_offset = 0;
   5392 	int ret = 0;
   5393 
   5394 	nfuncs = attr->func_info_cnt;
   5395 	if (!nfuncs)
   5396 		return 0;
   5397 
   5398 	if (nfuncs != env->subprog_cnt) {
   5399 		verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
   5400 		return -EINVAL;
   5401 	}
   5402 
   5403 	urec_size = attr->func_info_rec_size;
   5404 	if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
   5405 	    urec_size > MAX_FUNCINFO_REC_SIZE ||
   5406 	    urec_size % sizeof(u32)) {
   5407 		verbose(env, "invalid func info rec size %u\n", urec_size);
   5408 		return -EINVAL;
   5409 	}
   5410 
   5411 	prog = env->prog;
   5412 	btf = prog->aux->btf;
   5413 
   5414 	urecord = u64_to_user_ptr(attr->func_info);
   5415 	min_size = min_t(u32, krec_size, urec_size);
   5416 
   5417 	krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
   5418 	if (!krecord)
   5419 		return -ENOMEM;
   5420 
   5421 	for (i = 0; i < nfuncs; i++) {
   5422 		ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
   5423 		if (ret) {
   5424 			if (ret == -E2BIG) {
   5425 				verbose(env, "nonzero tailing record in func info");
   5426 				/* set the size kernel expects so loader can zero
   5427 				 * out the rest of the record.
   5428 				 */
   5429 				if (put_user(min_size, &uattr->func_info_rec_size))
   5430 					ret = -EFAULT;
   5431 			}
   5432 			goto err_free;
   5433 		}
   5434 
   5435 		if (copy_from_user(&krecord[i], urecord, min_size)) {
   5436 			ret = -EFAULT;
   5437 			goto err_free;
   5438 		}
   5439 
   5440 		/* check insn_off */
   5441 		if (i == 0) {
   5442 			if (krecord[i].insn_off) {
   5443 				verbose(env,
   5444 					"nonzero insn_off %u for the first func info record",
   5445 					krecord[i].insn_off);
   5446 				ret = -EINVAL;
   5447 				goto err_free;
   5448 			}
   5449 		} else if (krecord[i].insn_off <= prev_offset) {
   5450 			verbose(env,
   5451 				"same or smaller insn offset (%u) than previous func info record (%u)",
   5452 				krecord[i].insn_off, prev_offset);
   5453 			ret = -EINVAL;
   5454 			goto err_free;
   5455 		}
   5456 
   5457 		if (env->subprog_info[i].start != krecord[i].insn_off) {
   5458 			verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
   5459 			ret = -EINVAL;
   5460 			goto err_free;
   5461 		}
   5462 
   5463 		/* check type_id */
   5464 		type = btf_type_by_id(btf, krecord[i].type_id);
   5465 		if (!type || BTF_INFO_KIND(type->info) != BTF_KIND_FUNC) {
   5466 			verbose(env, "invalid type id %d in func info",
   5467 				krecord[i].type_id);
   5468 			ret = -EINVAL;
   5469 			goto err_free;
   5470 		}
   5471 
   5472 		prev_offset = krecord[i].insn_off;
   5473 		urecord += urec_size;
   5474 	}
   5475 
   5476 	prog->aux->func_info = krecord;
   5477 	prog->aux->func_info_cnt = nfuncs;
   5478 	return 0;
   5479 
   5480 err_free:
   5481 	kvfree(krecord);
   5482 	return ret;
   5483 }
   5484 
   5485 static void adjust_btf_func(struct bpf_verifier_env *env)
   5486 {
   5487 	int i;
   5488 
   5489 	if (!env->prog->aux->func_info)
   5490 		return;
   5491 
   5492 	for (i = 0; i < env->subprog_cnt; i++)
   5493 		env->prog->aux->func_info[i].insn_off = env->subprog_info[i].start;
   5494 }
   5495 
   5496 #define MIN_BPF_LINEINFO_SIZE	(offsetof(struct bpf_line_info, line_col) + \
   5497 		sizeof(((struct bpf_line_info *)(0))->line_col))
   5498 #define MAX_LINEINFO_REC_SIZE	MAX_FUNCINFO_REC_SIZE
   5499 
   5500 static int check_btf_line(struct bpf_verifier_env *env,
   5501 			  const union bpf_attr *attr,
   5502 			  union bpf_attr __user *uattr)
   5503 {
   5504 	u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
   5505 	struct bpf_subprog_info *sub;
   5506 	struct bpf_line_info *linfo;
   5507 	struct bpf_prog *prog;
   5508 	const struct btf *btf;
   5509 	void __user *ulinfo;
   5510 	int err;
   5511 
   5512 	nr_linfo = attr->line_info_cnt;
   5513 	if (!nr_linfo)
   5514 		return 0;
   5515 
   5516 	rec_size = attr->line_info_rec_size;
   5517 	if (rec_size < MIN_BPF_LINEINFO_SIZE ||
   5518 	    rec_size > MAX_LINEINFO_REC_SIZE ||
   5519 	    rec_size & (sizeof(u32) - 1))
   5520 		return -EINVAL;
   5521 
   5522 	/* Need to zero it in case the userspace may
   5523 	 * pass in a smaller bpf_line_info object.
   5524 	 */
   5525 	linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info),
   5526 			 GFP_KERNEL | __GFP_NOWARN);
   5527 	if (!linfo)
   5528 		return -ENOMEM;
   5529 
   5530 	prog = env->prog;
   5531 	btf = prog->aux->btf;
   5532 
   5533 	s = 0;
   5534 	sub = env->subprog_info;
   5535 	ulinfo = u64_to_user_ptr(attr->line_info);
   5536 	expected_size = sizeof(struct bpf_line_info);
   5537 	ncopy = min_t(u32, expected_size, rec_size);
   5538 	for (i = 0; i < nr_linfo; i++) {
   5539 		err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
   5540 		if (err) {
   5541 			if (err == -E2BIG) {
   5542 				verbose(env, "nonzero tailing record in line_info");
   5543 				if (put_user(expected_size,
   5544 					     &uattr->line_info_rec_size))
   5545 					err = -EFAULT;
   5546 			}
   5547 			goto err_free;
   5548 		}
   5549 
   5550 		if (copy_from_user(&linfo[i], ulinfo, ncopy)) {
   5551 			err = -EFAULT;
   5552 			goto err_free;
   5553 		}
   5554 
   5555 		/*
   5556 		 * Check insn_off to ensure
   5557 		 * 1) strictly increasing AND
   5558 		 * 2) bounded by prog->len
   5559 		 *
   5560 		 * The linfo[0].insn_off == 0 check logically falls into
   5561 		 * the later "missing bpf_line_info for func..." case
   5562 		 * because the first linfo[0].insn_off must be the
   5563 		 * first sub also and the first sub must have
   5564 		 * subprog_info[0].start == 0.
   5565 		 */
   5566 		if ((i && linfo[i].insn_off <= prev_offset) ||
   5567 		    linfo[i].insn_off >= prog->len) {
   5568 			verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
   5569 				i, linfo[i].insn_off, prev_offset,
   5570 				prog->len);
   5571 			err = -EINVAL;
   5572 			goto err_free;
   5573 		}
   5574 
   5575 		if (!prog->insnsi[linfo[i].insn_off].code) {
   5576 			verbose(env,
   5577 				"Invalid insn code at line_info[%u].insn_off\n",
   5578 				i);
   5579 			err = -EINVAL;
   5580 			goto err_free;
   5581 		}
   5582 
   5583 		if (!btf_name_by_offset(btf, linfo[i].line_off) ||
   5584 		    !btf_name_by_offset(btf, linfo[i].file_name_off)) {
   5585 			verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
   5586 			err = -EINVAL;
   5587 			goto err_free;
   5588 		}
   5589 
   5590 		if (s != env->subprog_cnt) {
   5591 			if (linfo[i].insn_off == sub[s].start) {
   5592 				sub[s].linfo_idx = i;
   5593 				s++;
   5594 			} else if (sub[s].start < linfo[i].insn_off) {
   5595 				verbose(env, "missing bpf_line_info for func#%u\n", s);
   5596 				err = -EINVAL;
   5597 				goto err_free;
   5598 			}
   5599 		}
   5600 
   5601 		prev_offset = linfo[i].insn_off;
   5602 		ulinfo += rec_size;
   5603 	}
   5604 
   5605 	if (s != env->subprog_cnt) {
   5606 		verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n",
   5607 			env->subprog_cnt - s, s);
   5608 		err = -EINVAL;
   5609 		goto err_free;
   5610 	}
   5611 
   5612 	prog->aux->linfo = linfo;
   5613 	prog->aux->nr_linfo = nr_linfo;
   5614 
   5615 	return 0;
   5616 
   5617 err_free:
   5618 	kvfree(linfo);
   5619 	return err;
   5620 }
   5621 
   5622 static int check_btf_info(struct bpf_verifier_env *env,
   5623 			  const union bpf_attr *attr,
   5624 			  union bpf_attr __user *uattr)
   5625 {
   5626 	struct btf *btf;
   5627 	int err;
   5628 
   5629 	if (!attr->func_info_cnt && !attr->line_info_cnt)
   5630 		return 0;
   5631 
   5632 	btf = btf_get_by_fd(attr->prog_btf_fd);
   5633 	if (IS_ERR(btf))
   5634 		return PTR_ERR(btf);
   5635 	env->prog->aux->btf = btf;
   5636 
   5637 	err = check_btf_func(env, attr, uattr);
   5638 	if (err)
   5639 		return err;
   5640 
   5641 	err = check_btf_line(env, attr, uattr);
   5642 	if (err)
   5643 		return err;
   5644 
   5645 	return 0;
   5646 }
   5647 
   5648 /* check %cur's range satisfies %old's */
   5649 static bool range_within(struct bpf_reg_state *old,
   5650 			 struct bpf_reg_state *cur)
   5651 {
   5652 	return old->umin_value <= cur->umin_value &&
   5653 	       old->umax_value >= cur->umax_value &&
   5654 	       old->smin_value <= cur->smin_value &&
   5655 	       old->smax_value >= cur->smax_value;
   5656 }
   5657 
   5658 /* Maximum number of register states that can exist at once */
   5659 #define ID_MAP_SIZE	(MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE)
   5660 struct idpair {
   5661 	u32 old;
   5662 	u32 cur;
   5663 };
   5664 
   5665 /* If in the old state two registers had the same id, then they need to have
   5666  * the same id in the new state as well.  But that id could be different from
   5667  * the old state, so we need to track the mapping from old to new ids.
   5668  * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
   5669  * regs with old id 5 must also have new id 9 for the new state to be safe.  But
   5670  * regs with a different old id could still have new id 9, we don't care about
   5671  * that.
   5672  * So we look through our idmap to see if this old id has been seen before.  If
   5673  * so, we require the new id to match; otherwise, we add the id pair to the map.
   5674  */
   5675 static bool check_ids(u32 old_id, u32 cur_id, struct idpair *idmap)
   5676 {
   5677 	unsigned int i;
   5678 
   5679 	for (i = 0; i < ID_MAP_SIZE; i++) {
   5680 		if (!idmap[i].old) {
   5681 			/* Reached an empty slot; haven't seen this id before */
   5682 			idmap[i].old = old_id;
   5683 			idmap[i].cur = cur_id;
   5684 			return true;
   5685 		}
   5686 		if (idmap[i].old == old_id)
   5687 			return idmap[i].cur == cur_id;
   5688 	}
   5689 	/* We ran out of idmap slots, which should be impossible */
   5690 	WARN_ON_ONCE(1);
   5691 	return false;
   5692 }
   5693 
   5694 static void clean_func_state(struct bpf_verifier_env *env,
   5695 			     struct bpf_func_state *st)
   5696 {
   5697 	enum bpf_reg_liveness live;
   5698 	int i, j;
   5699 
   5700 	for (i = 0; i < BPF_REG_FP; i++) {
   5701 		live = st->regs[i].live;
   5702 		/* liveness must not touch this register anymore */
   5703 		st->regs[i].live |= REG_LIVE_DONE;
   5704 		if (!(live & REG_LIVE_READ))
   5705 			/* since the register is unused, clear its state
   5706 			 * to make further comparison simpler
   5707 			 */
   5708 			__mark_reg_not_init(&st->regs[i]);
   5709 	}
   5710 
   5711 	for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
   5712 		live = st->stack[i].spilled_ptr.live;
   5713 		/* liveness must not touch this stack slot anymore */
   5714 		st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
   5715 		if (!(live & REG_LIVE_READ)) {
   5716 			__mark_reg_not_init(&st->stack[i].spilled_ptr);
   5717 			for (j = 0; j < BPF_REG_SIZE; j++)
   5718 				st->stack[i].slot_type[j] = STACK_INVALID;
   5719 		}
   5720 	}
   5721 }
   5722 
   5723 static void clean_verifier_state(struct bpf_verifier_env *env,
   5724 				 struct bpf_verifier_state *st)
   5725 {
   5726 	int i;
   5727 
   5728 	if (st->frame[0]->regs[0].live & REG_LIVE_DONE)
   5729 		/* all regs in this state in all frames were already marked */
   5730 		return;
   5731 
   5732 	for (i = 0; i <= st->curframe; i++)
   5733 		clean_func_state(env, st->frame[i]);
   5734 }
   5735 
   5736 /* the parentage chains form a tree.
   5737  * the verifier states are added to state lists at given insn and
   5738  * pushed into state stack for future exploration.
   5739  * when the verifier reaches bpf_exit insn some of the verifer states
   5740  * stored in the state lists have their final liveness state already,
   5741  * but a lot of states will get revised from liveness point of view when
   5742  * the verifier explores other branches.
   5743  * Example:
   5744  * 1: r0 = 1
   5745  * 2: if r1 == 100 goto pc+1
   5746  * 3: r0 = 2
   5747  * 4: exit
   5748  * when the verifier reaches exit insn the register r0 in the state list of
   5749  * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch
   5750  * of insn 2 and goes exploring further. At the insn 4 it will walk the
   5751  * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ.
   5752  *
   5753  * Since the verifier pushes the branch states as it sees them while exploring
   5754  * the program the condition of walking the branch instruction for the second
   5755  * time means that all states below this branch were already explored and
   5756  * their final liveness markes are already propagated.
   5757  * Hence when the verifier completes the search of state list in is_state_visited()
   5758  * we can call this clean_live_states() function to mark all liveness states
   5759  * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
   5760  * will not be used.
   5761  * This function also clears the registers and stack for states that !READ
   5762  * to simplify state merging.
   5763  *
   5764  * Important note here that walking the same branch instruction in the callee
   5765  * doesn't meant that the states are DONE. The verifier has to compare
   5766  * the callsites
   5767  */
   5768 static void clean_live_states(struct bpf_verifier_env *env, int insn,
   5769 			      struct bpf_verifier_state *cur)
   5770 {
   5771 	struct bpf_verifier_state_list *sl;
   5772 	int i;
   5773 
   5774 	sl = env->explored_states[insn];
   5775 	if (!sl)
   5776 		return;
   5777 
   5778 	while (sl != STATE_LIST_MARK) {
   5779 		if (sl->state.curframe != cur->curframe)
   5780 			goto next;
   5781 		for (i = 0; i <= cur->curframe; i++)
   5782 			if (sl->state.frame[i]->callsite != cur->frame[i]->callsite)
   5783 				goto next;
   5784 		clean_verifier_state(env, &sl->state);
   5785 next:
   5786 		sl = sl->next;
   5787 	}
   5788 }
   5789 
   5790 /* Returns true if (rold safe implies rcur safe) */
   5791 static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
   5792 		    struct idpair *idmap)
   5793 {
   5794 	bool equal;
   5795 
   5796 	if (!(rold->live & REG_LIVE_READ))
   5797 		/* explored state didn't use this */
   5798 		return true;
   5799 
   5800 	equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0;
   5801 
   5802 	if (rold->type == PTR_TO_STACK)
   5803 		/* two stack pointers are equal only if they're pointing to
   5804 		 * the same stack frame, since fp-8 in foo != fp-8 in bar
   5805 		 */
   5806 		return equal && rold->frameno == rcur->frameno;
   5807 
   5808 	if (equal)
   5809 		return true;
   5810 
   5811 	if (rold->type == NOT_INIT)
   5812 		/* explored state can't have used this */
   5813 		return true;
   5814 	if (rcur->type == NOT_INIT)
   5815 		return false;
   5816 	switch (rold->type) {
   5817 	case SCALAR_VALUE:
   5818 		if (rcur->type == SCALAR_VALUE) {
   5819 			/* new val must satisfy old val knowledge */
   5820 			return range_within(rold, rcur) &&
   5821 			       tnum_in(rold->var_off, rcur->var_off);
   5822 		} else {
   5823 			/* We're trying to use a pointer in place of a scalar.
   5824 			 * Even if the scalar was unbounded, this could lead to
   5825 			 * pointer leaks because scalars are allowed to leak
   5826 			 * while pointers are not. We could make this safe in
   5827 			 * special cases if root is calling us, but it's
   5828 			 * probably not worth the hassle.
   5829 			 */
   5830 			return false;
   5831 		}
   5832 	case PTR_TO_MAP_VALUE:
   5833 		/* If the new min/max/var_off satisfy the old ones and
   5834 		 * everything else matches, we are OK.
   5835 		 * 'id' is not compared, since it's only used for maps with
   5836 		 * bpf_spin_lock inside map element and in such cases if
   5837 		 * the rest of the prog is valid for one map element then
   5838 		 * it's valid for all map elements regardless of the key
   5839 		 * used in bpf_map_lookup()
   5840 		 */
   5841 		return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
   5842 		       range_within(rold, rcur) &&
   5843 		       tnum_in(rold->var_off, rcur->var_off);
   5844 	case PTR_TO_MAP_VALUE_OR_NULL:
   5845 		/* a PTR_TO_MAP_VALUE could be safe to use as a
   5846 		 * PTR_TO_MAP_VALUE_OR_NULL into the same map.
   5847 		 * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
   5848 		 * checked, doing so could have affected others with the same
   5849 		 * id, and we can't check for that because we lost the id when
   5850 		 * we converted to a PTR_TO_MAP_VALUE.
   5851 		 */
   5852 		if (rcur->type != PTR_TO_MAP_VALUE_OR_NULL)
   5853 			return false;
   5854 		if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)))
   5855 			return false;
   5856 		/* Check our ids match any regs they're supposed to */
   5857 		return check_ids(rold->id, rcur->id, idmap);
   5858 	case PTR_TO_PACKET_META:
   5859 	case PTR_TO_PACKET:
   5860 		if (rcur->type != rold->type)
   5861 			return false;
   5862 		/* We must have at least as much range as the old ptr
   5863 		 * did, so that any accesses which were safe before are
   5864 		 * still safe.  This is true even if old range < old off,
   5865 		 * since someone could have accessed through (ptr - k), or
   5866 		 * even done ptr -= k in a register, to get a safe access.
   5867 		 */
   5868 		if (rold->range > rcur->range)
   5869 			return false;
   5870 		/* If the offsets don't match, we can't trust our alignment;
   5871 		 * nor can we be sure that we won't fall out of range.
   5872 		 */
   5873 		if (rold->off != rcur->off)
   5874 			return false;
   5875 		/* id relations must be preserved */
   5876 		if (rold->id && !check_ids(rold->id, rcur->id, idmap))
   5877 			return false;
   5878 		/* new val must satisfy old val knowledge */
   5879 		return range_within(rold, rcur) &&
   5880 		       tnum_in(rold->var_off, rcur->var_off);
   5881 	case PTR_TO_CTX:
   5882 	case CONST_PTR_TO_MAP:
   5883 	case PTR_TO_PACKET_END:
   5884 	case PTR_TO_FLOW_KEYS:
   5885 	case PTR_TO_SOCKET:
   5886 	case PTR_TO_SOCKET_OR_NULL:
   5887 	case PTR_TO_SOCK_COMMON:
   5888 	case PTR_TO_SOCK_COMMON_OR_NULL:
   5889 	case PTR_TO_TCP_SOCK:
   5890 	case PTR_TO_TCP_SOCK_OR_NULL:
   5891 		/* Only valid matches are exact, which memcmp() above
   5892 		 * would have accepted
   5893 		 */
   5894 	default:
   5895 		/* Don't know what's going on, just say it's not safe */
   5896 		return false;
   5897 	}
   5898 
   5899 	/* Shouldn't get here; if we do, say it's not safe */
   5900 	WARN_ON_ONCE(1);
   5901 	return false;
   5902 }
   5903 
   5904 static bool stacksafe(struct bpf_func_state *old,
   5905 		      struct bpf_func_state *cur,
   5906 		      struct idpair *idmap)
   5907 {
   5908 	int i, spi;
   5909 
   5910 	/* walk slots of the explored stack and ignore any additional
   5911 	 * slots in the current stack, since explored(safe) state
   5912 	 * didn't use them
   5913 	 */
   5914 	for (i = 0; i < old->allocated_stack; i++) {
   5915 		spi = i / BPF_REG_SIZE;
   5916 
   5917 		if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) {
   5918 			i += BPF_REG_SIZE - 1;
   5919 			/* explored state didn't use this */
   5920 			continue;
   5921 		}
   5922 
   5923 		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
   5924 			continue;
   5925 
   5926 		/* explored stack has more populated slots than current stack
   5927 		 * and these slots were used
   5928 		 */
   5929 		if (i >= cur->allocated_stack)
   5930 			return false;
   5931 
   5932 		/* if old state was safe with misc data in the stack
   5933 		 * it will be safe with zero-initialized stack.
   5934 		 * The opposite is not true
   5935 		 */
   5936 		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
   5937 		    cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
   5938 			continue;
   5939 		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
   5940 		    cur->stack[spi].slot_type[i % BPF_REG_SIZE])
   5941 			/* Ex: old explored (safe) state has STACK_SPILL in
   5942 			 * this stack slot, but current has has STACK_MISC ->
   5943 			 * this verifier states are not equivalent,
   5944 			 * return false to continue verification of this path
   5945 			 */
   5946 			return false;
   5947 		if (i % BPF_REG_SIZE)
   5948 			continue;
   5949 		if (old->stack[spi].slot_type[0] != STACK_SPILL)
   5950 			continue;
   5951 		if (!regsafe(&old->stack[spi].spilled_ptr,
   5952 			     &cur->stack[spi].spilled_ptr,
   5953 			     idmap))
   5954 			/* when explored and current stack slot are both storing
   5955 			 * spilled registers, check that stored pointers types
   5956 			 * are the same as well.
   5957 			 * Ex: explored safe path could have stored
   5958 			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
   5959 			 * but current path has stored:
   5960 			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
   5961 			 * such verifier states are not equivalent.
   5962 			 * return false to continue verification of this path
   5963 			 */
   5964 			return false;
   5965 	}
   5966 	return true;
   5967 }
   5968 
   5969 static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur)
   5970 {
   5971 	if (old->acquired_refs != cur->acquired_refs)
   5972 		return false;
   5973 	return !memcmp(old->refs, cur->refs,
   5974 		       sizeof(*old->refs) * old->acquired_refs);
   5975 }
   5976 
   5977 /* compare two verifier states
   5978  *
   5979  * all states stored in state_list are known to be valid, since
   5980  * verifier reached 'bpf_exit' instruction through them
   5981  *
   5982  * this function is called when verifier exploring different branches of
   5983  * execution popped from the state stack. If it sees an old state that has
   5984  * more strict register state and more strict stack state then this execution
   5985  * branch doesn't need to be explored further, since verifier already
   5986  * concluded that more strict state leads to valid finish.
   5987  *
   5988  * Therefore two states are equivalent if register state is more conservative
   5989  * and explored stack state is more conservative than the current one.
   5990  * Example:
   5991  *       explored                   current
   5992  * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
   5993  * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
   5994  *
   5995  * In other words if current stack state (one being explored) has more
   5996  * valid slots than old one that already passed validation, it means
   5997  * the verifier can stop exploring and conclude that current state is valid too
   5998  *
   5999  * Similarly with registers. If explored state has register type as invalid
   6000  * whereas register type in current state is meaningful, it means that
   6001  * the current state will reach 'bpf_exit' instruction safely
   6002  */
   6003 static bool func_states_equal(struct bpf_func_state *old,
   6004 			      struct bpf_func_state *cur)
   6005 {
   6006 	struct idpair *idmap;
   6007 	bool ret = false;
   6008 	int i;
   6009 
   6010 	idmap = kcalloc(ID_MAP_SIZE, sizeof(struct idpair), GFP_KERNEL);
   6011 	/* If we failed to allocate the idmap, just say it's not safe */
   6012 	if (!idmap)
   6013 		return false;
   6014 
   6015 	for (i = 0; i < MAX_BPF_REG; i++) {
   6016 		if (!regsafe(&old->regs[i], &cur->regs[i], idmap))
   6017 			goto out_free;
   6018 	}
   6019 
   6020 	if (!stacksafe(old, cur, idmap))
   6021 		goto out_free;
   6022 
   6023 	if (!refsafe(old, cur))
   6024 		goto out_free;
   6025 	ret = true;
   6026 out_free:
   6027 	kfree(idmap);
   6028 	return ret;
   6029 }
   6030 
   6031 static bool states_equal(struct bpf_verifier_env *env,
   6032 			 struct bpf_verifier_state *old,
   6033 			 struct bpf_verifier_state *cur)
   6034 {
   6035 	int i;
   6036 
   6037 	if (old->curframe != cur->curframe)
   6038 		return false;
   6039 
   6040 	/* Verification state from speculative execution simulation
   6041 	 * must never prune a non-speculative execution one.
   6042 	 */
   6043 	if (old->speculative && !cur->speculative)
   6044 		return false;
   6045 
   6046 	if (old->active_spin_lock != cur->active_spin_lock)
   6047 		return false;
   6048 
   6049 	/* for states to be equal callsites have to be the same
   6050 	 * and all frame states need to be equivalent
   6051 	 */
   6052 	for (i = 0; i <= old->curframe; i++) {
   6053 		if (old->frame[i]->callsite != cur->frame[i]->callsite)
   6054 			return false;
   6055 		if (!func_states_equal(old->frame[i], cur->frame[i]))
   6056 			return false;
   6057 	}
   6058 	return true;
   6059 }
   6060 
   6061 /* A write screens off any subsequent reads; but write marks come from the
   6062  * straight-line code between a state and its parent.  When we arrive at an
   6063  * equivalent state (jump target or such) we didn't arrive by the straight-line
   6064  * code, so read marks in the state must propagate to the parent regardless
   6065  * of the state's write marks. That's what 'parent == state->parent' comparison
   6066  * in mark_reg_read() is for.
   6067  */
   6068 static int propagate_liveness(struct bpf_verifier_env *env,
   6069 			      const struct bpf_verifier_state *vstate,
   6070 			      struct bpf_verifier_state *vparent)
   6071 {
   6072 	int i, frame, err = 0;
   6073 	struct bpf_func_state *state, *parent;
   6074 
   6075 	if (vparent->curframe != vstate->curframe) {
   6076 		WARN(1, "propagate_live: parent frame %d current frame %d\n",
   6077 		     vparent->curframe, vstate->curframe);
   6078 		return -EFAULT;
   6079 	}
   6080 	/* Propagate read liveness of registers... */
   6081 	BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
   6082 	for (frame = 0; frame <= vstate->curframe; frame++) {
   6083 		/* We don't need to worry about FP liveness, it's read-only */
   6084 		for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
   6085 			if (vparent->frame[frame]->regs[i].live & REG_LIVE_READ)
   6086 				continue;
   6087 			if (vstate->frame[frame]->regs[i].live & REG_LIVE_READ) {
   6088 				err = mark_reg_read(env, &vstate->frame[frame]->regs[i],
   6089 						    &vparent->frame[frame]->regs[i]);
   6090 				if (err)
   6091 					return err;
   6092 			}
   6093 		}
   6094 	}
   6095 
   6096 	/* ... and stack slots */
   6097 	for (frame = 0; frame <= vstate->curframe; frame++) {
   6098 		state = vstate->frame[frame];
   6099 		parent = vparent->frame[frame];
   6100 		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
   6101 			    i < parent->allocated_stack / BPF_REG_SIZE; i++) {
   6102 			if (parent->stack[i].spilled_ptr.live & REG_LIVE_READ)
   6103 				continue;
   6104 			if (state->stack[i].spilled_ptr.live & REG_LIVE_READ)
   6105 				mark_reg_read(env, &state->stack[i].spilled_ptr,
   6106 					      &parent->stack[i].spilled_ptr);
   6107 		}
   6108 	}
   6109 	return err;
   6110 }
   6111 
   6112 static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
   6113 {
   6114 	struct bpf_verifier_state_list *new_sl;
   6115 	struct bpf_verifier_state_list *sl;
   6116 	struct bpf_verifier_state *cur = env->cur_state, *new;
   6117 	int i, j, err, states_cnt = 0;
   6118 
   6119 	sl = env->explored_states[insn_idx];
   6120 	if (!sl)
   6121 		/* this 'insn_idx' instruction wasn't marked, so we will not
   6122 		 * be doing state search here
   6123 		 */
   6124 		return 0;
   6125 
   6126 	clean_live_states(env, insn_idx, cur);
   6127 
   6128 	while (sl != STATE_LIST_MARK) {
   6129 		if (states_equal(env, &sl->state, cur)) {
   6130 			/* reached equivalent register/stack state,
   6131 			 * prune the search.
   6132 			 * Registers read by the continuation are read by us.
   6133 			 * If we have any write marks in env->cur_state, they
   6134 			 * will prevent corresponding reads in the continuation
   6135 			 * from reaching our parent (an explored_state).  Our
   6136 			 * own state will get the read marks recorded, but
   6137 			 * they'll be immediately forgotten as we're pruning
   6138 			 * this state and will pop a new one.
   6139 			 */
   6140 			err = propagate_liveness(env, &sl->state, cur);
   6141 			if (err)
   6142 				return err;
   6143 			return 1;
   6144 		}
   6145 		sl = sl->next;
   6146 		states_cnt++;
   6147 	}
   6148 
   6149 	if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
   6150 		return 0;
   6151 
   6152 	/* there were no equivalent states, remember current one.
   6153 	 * technically the current state is not proven to be safe yet,
   6154 	 * but it will either reach outer most bpf_exit (which means it's safe)
   6155 	 * or it will be rejected. Since there are no loops, we won't be
   6156 	 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
   6157 	 * again on the way to bpf_exit
   6158 	 */
   6159 	new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
   6160 	if (!new_sl)
   6161 		return -ENOMEM;
   6162 
   6163 	/* add new state to the head of linked list */
   6164 	new = &new_sl->state;
   6165 	err = copy_verifier_state(new, cur);
   6166 	if (err) {
   6167 		free_verifier_state(new, false);
   6168 		kfree(new_sl);
   6169 		return err;
   6170 	}
   6171 	new_sl->next = env->explored_states[insn_idx];
   6172 	env->explored_states[insn_idx] = new_sl;
   6173 	/* connect new state to parentage chain. Current frame needs all
   6174 	 * registers connected. Only r6 - r9 of the callers are alive (pushed
   6175 	 * to the stack implicitly by JITs) so in callers' frames connect just
   6176 	 * r6 - r9 as an optimization. Callers will have r1 - r5 connected to
   6177 	 * the state of the call instruction (with WRITTEN set), and r0 comes
   6178 	 * from callee with its full parentage chain, anyway.
   6179 	 */
   6180 	for (j = 0; j <= cur->curframe; j++)
   6181 		for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
   6182 			cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
   6183 	/* clear write marks in current state: the writes we did are not writes
   6184 	 * our child did, so they don't screen off its reads from us.
   6185 	 * (There are no read marks in current state, because reads always mark
   6186 	 * their parent and current state never has children yet.  Only
   6187 	 * explored_states can get read marks.)
   6188 	 */
   6189 	for (i = 0; i < BPF_REG_FP; i++)
   6190 		cur->frame[cur->curframe]->regs[i].live = REG_LIVE_NONE;
   6191 
   6192 	/* all stack frames are accessible from callee, clear them all */
   6193 	for (j = 0; j <= cur->curframe; j++) {
   6194 		struct bpf_func_state *frame = cur->frame[j];
   6195 		struct bpf_func_state *newframe = new->frame[j];
   6196 
   6197 		for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
   6198 			frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
   6199 			frame->stack[i].spilled_ptr.parent =
   6200 						&newframe->stack[i].spilled_ptr;
   6201 		}
   6202 	}
   6203 	return 0;
   6204 }
   6205 
   6206 /* Return true if it's OK to have the same insn return a different type. */
   6207 static bool reg_type_mismatch_ok(enum bpf_reg_type type)
   6208 {
   6209 	switch (type) {
   6210 	case PTR_TO_CTX:
   6211 	case PTR_TO_SOCKET:
   6212 	case PTR_TO_SOCKET_OR_NULL:
   6213 	case PTR_TO_SOCK_COMMON:
   6214 	case PTR_TO_SOCK_COMMON_OR_NULL:
   6215 	case PTR_TO_TCP_SOCK:
   6216 	case PTR_TO_TCP_SOCK_OR_NULL:
   6217 		return false;
   6218 	default:
   6219 		return true;
   6220 	}
   6221 }
   6222 
   6223 /* If an instruction was previously used with particular pointer types, then we
   6224  * need to be careful to avoid cases such as the below, where it may be ok
   6225  * for one branch accessing the pointer, but not ok for the other branch:
   6226  *
   6227  * R1 = sock_ptr
   6228  * goto X;
   6229  * ...
   6230  * R1 = some_other_valid_ptr;
   6231  * goto X;
   6232  * ...
   6233  * R2 = *(u32 *)(R1 + 0);
   6234  */
   6235 static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
   6236 {
   6237 	return src != prev && (!reg_type_mismatch_ok(src) ||
   6238 			       !reg_type_mismatch_ok(prev));
   6239 }
   6240 
   6241 static int do_check(struct bpf_verifier_env *env)
   6242 {
   6243 	struct bpf_verifier_state *state;
   6244 	struct bpf_insn *insns = env->prog->insnsi;
   6245 	struct bpf_reg_state *regs;
   6246 	int insn_cnt = env->prog->len, i;
   6247 	int insn_processed = 0;
   6248 	bool do_print_state = false;
   6249 
   6250 	env->prev_linfo = NULL;
   6251 
   6252 	state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
   6253 	if (!state)
   6254 		return -ENOMEM;
   6255 	state->curframe = 0;
   6256 	state->speculative = false;
   6257 	state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
   6258 	if (!state->frame[0]) {
   6259 		kfree(state);
   6260 		return -ENOMEM;
   6261 	}
   6262 	env->cur_state = state;
   6263 	init_func_state(env, state->frame[0],
   6264 			BPF_MAIN_FUNC /* callsite */,
   6265 			0 /* frameno */,
   6266 			0 /* subprogno, zero == main subprog */);
   6267 
   6268 	for (;;) {
   6269 		struct bpf_insn *insn;
   6270 		u8 class;
   6271 		int err;
   6272 
   6273 		if (env->insn_idx >= insn_cnt) {
   6274 			verbose(env, "invalid insn idx %d insn_cnt %d\n",
   6275 				env->insn_idx, insn_cnt);
   6276 			return -EFAULT;
   6277 		}
   6278 
   6279 		insn = &insns[env->insn_idx];
   6280 		class = BPF_CLASS(insn->code);
   6281 
   6282 		if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
   6283 			verbose(env,
   6284 				"BPF program is too large. Processed %d insn\n",
   6285 				insn_processed);
   6286 			return -E2BIG;
   6287 		}
   6288 
   6289 		err = is_state_visited(env, env->insn_idx);
   6290 		if (err < 0)
   6291 			return err;
   6292 		if (err == 1) {
   6293 			/* found equivalent state, can prune the search */
   6294 			if (env->log.level) {
   6295 				if (do_print_state)
   6296 					verbose(env, "\nfrom %d to %d%s: safe\n",
   6297 						env->prev_insn_idx, env->insn_idx,
   6298 						env->cur_state->speculative ?
   6299 						" (speculative execution)" : "");
   6300 				else
   6301 					verbose(env, "%d: safe\n", env->insn_idx);
   6302 			}
   6303 			goto process_bpf_exit;
   6304 		}
   6305 
   6306 		if (signal_pending(current))
   6307 			return -EAGAIN;
   6308 
   6309 		if (need_resched())
   6310 			cond_resched();
   6311 
   6312 		if (env->log.level > 1 || (env->log.level && do_print_state)) {
   6313 			if (env->log.level > 1)
   6314 				verbose(env, "%d:", env->insn_idx);
   6315 			else
   6316 				verbose(env, "\nfrom %d to %d%s:",
   6317 					env->prev_insn_idx, env->insn_idx,
   6318 					env->cur_state->speculative ?
   6319 					" (speculative execution)" : "");
   6320 			print_verifier_state(env, state->frame[state->curframe]);
   6321 			do_print_state = false;
   6322 		}
   6323 
   6324 		if (env->log.level) {
   6325 			const struct bpf_insn_cbs cbs = {
   6326 				.cb_print	= verbose,
   6327 				.private_data	= env,
   6328 			};
   6329 
   6330 			verbose_linfo(env, env->insn_idx, "; ");
   6331 			verbose(env, "%d: ", env->insn_idx);
   6332 			print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
   6333 		}
   6334 
   6335 		if (bpf_prog_is_dev_bound(env->prog->aux)) {
   6336 			err = bpf_prog_offload_verify_insn(env, env->insn_idx,
   6337 							   env->prev_insn_idx);
   6338 			if (err)
   6339 				return err;
   6340 		}
   6341 
   6342 		regs = cur_regs(env);
   6343 		env->insn_aux_data[env->insn_idx].seen = true;
   6344 
   6345 		if (class == BPF_ALU || class == BPF_ALU64) {
   6346 			err = check_alu_op(env, insn);
   6347 			if (err)
   6348 				return err;
   6349 
   6350 		} else if (class == BPF_LDX) {
   6351 			enum bpf_reg_type *prev_src_type, src_reg_type;
   6352 
   6353 			/* check for reserved fields is already done */
   6354 
   6355 			/* check src operand */
   6356 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
   6357 			if (err)
   6358 				return err;
   6359 
   6360 			err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
   6361 			if (err)
   6362 				return err;
   6363 
   6364 			src_reg_type = regs[insn->src_reg].type;
   6365 
   6366 			/* check that memory (src_reg + off) is readable,
   6367 			 * the state of dst_reg will be updated by this func
   6368 			 */
   6369 			err = check_mem_access(env, env->insn_idx, insn->src_reg,
   6370 					       insn->off, BPF_SIZE(insn->code),
   6371 					       BPF_READ, insn->dst_reg, false);
   6372 			if (err)
   6373 				return err;
   6374 
   6375 			prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type;
   6376 
   6377 			if (*prev_src_type == NOT_INIT) {
   6378 				/* saw a valid insn
   6379 				 * dst_reg = *(u32 *)(src_reg + off)
   6380 				 * save type to validate intersecting paths
   6381 				 */
   6382 				*prev_src_type = src_reg_type;
   6383 
   6384 			} else if (reg_type_mismatch(src_reg_type, *prev_src_type)) {
   6385 				/* ABuser program is trying to use the same insn
   6386 				 * dst_reg = *(u32*) (src_reg + off)
   6387 				 * with different pointer types:
   6388 				 * src_reg == ctx in one branch and
   6389 				 * src_reg == stack|map in some other branch.
   6390 				 * Reject it.
   6391 				 */
   6392 				verbose(env, "same insn cannot be used with different pointers\n");
   6393 				return -EINVAL;
   6394 			}
   6395 
   6396 		} else if (class == BPF_STX) {
   6397 			enum bpf_reg_type *prev_dst_type, dst_reg_type;
   6398 
   6399 			if (BPF_MODE(insn->code) == BPF_XADD) {
   6400 				err = check_xadd(env, env->insn_idx, insn);
   6401 				if (err)
   6402 					return err;
   6403 				env->insn_idx++;
   6404 				continue;
   6405 			}
   6406 
   6407 			/* check src1 operand */
   6408 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
   6409 			if (err)
   6410 				return err;
   6411 			/* check src2 operand */
   6412 			err = check_reg_arg(env, insn->dst_reg, SRC_OP);
   6413 			if (err)
   6414 				return err;
   6415 
   6416 			dst_reg_type = regs[insn->dst_reg].type;
   6417 
   6418 			/* check that memory (dst_reg + off) is writeable */
   6419 			err = check_mem_access(env, env->insn_idx, insn->dst_reg,
   6420 					       insn->off, BPF_SIZE(insn->code),
   6421 					       BPF_WRITE, insn->src_reg, false);
   6422 			if (err)
   6423 				return err;
   6424 
   6425 			prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type;
   6426 
   6427 			if (*prev_dst_type == NOT_INIT) {
   6428 				*prev_dst_type = dst_reg_type;
   6429 			} else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) {
   6430 				verbose(env, "same insn cannot be used with different pointers\n");
   6431 				return -EINVAL;
   6432 			}
   6433 
   6434 		} else if (class == BPF_ST) {
   6435 			if (BPF_MODE(insn->code) != BPF_MEM ||
   6436 			    insn->src_reg != BPF_REG_0) {
   6437 				verbose(env, "BPF_ST uses reserved fields\n");
   6438 				return -EINVAL;
   6439 			}
   6440 			/* check src operand */
   6441 			err = check_reg_arg(env, insn->dst_reg, SRC_OP);
   6442 			if (err)
   6443 				return err;
   6444 
   6445 			if (is_ctx_reg(env, insn->dst_reg)) {
   6446 				verbose(env, "BPF_ST stores into R%d %s is not allowed\n",
   6447 					insn->dst_reg,
   6448 					reg_type_str[reg_state(env, insn->dst_reg)->type]);
   6449 				return -EACCES;
   6450 			}
   6451 
   6452 			/* check that memory (dst_reg + off) is writeable */
   6453 			err = check_mem_access(env, env->insn_idx, insn->dst_reg,
   6454 					       insn->off, BPF_SIZE(insn->code),
   6455 					       BPF_WRITE, -1, false);
   6456 			if (err)
   6457 				return err;
   6458 
   6459 		} else if (class == BPF_JMP || class == BPF_JMP32) {
   6460 			u8 opcode = BPF_OP(insn->code);
   6461 
   6462 			if (opcode == BPF_CALL) {
   6463 				if (BPF_SRC(insn->code) != BPF_K ||
   6464 				    insn->off != 0 ||
   6465 				    (insn->src_reg != BPF_REG_0 &&
   6466 				     insn->src_reg != BPF_PSEUDO_CALL) ||
   6467 				    insn->dst_reg != BPF_REG_0 ||
   6468 				    class == BPF_JMP32) {
   6469 					verbose(env, "BPF_CALL uses reserved fields\n");
   6470 					return -EINVAL;
   6471 				}
   6472 
   6473 				if (env->cur_state->active_spin_lock &&
   6474 				    (insn->src_reg == BPF_PSEUDO_CALL ||
   6475 				     insn->imm != BPF_FUNC_spin_unlock)) {
   6476 					verbose(env, "function calls are not allowed while holding a lock\n");
   6477 					return -EINVAL;
   6478 				}
   6479 				if (insn->src_reg == BPF_PSEUDO_CALL)
   6480 					err = check_func_call(env, insn, &env->insn_idx);
   6481 				else
   6482 					err = check_helper_call(env, insn->imm, env->insn_idx);
   6483 				if (err)
   6484 					return err;
   6485 
   6486 			} else if (opcode == BPF_JA) {
   6487 				if (BPF_SRC(insn->code) != BPF_K ||
   6488 				    insn->imm != 0 ||
   6489 				    insn->src_reg != BPF_REG_0 ||
   6490 				    insn->dst_reg != BPF_REG_0 ||
   6491 				    class == BPF_JMP32) {
   6492 					verbose(env, "BPF_JA uses reserved fields\n");
   6493 					return -EINVAL;
   6494 				}
   6495 
   6496 				env->insn_idx += insn->off + 1;
   6497 				continue;
   6498 
   6499 			} else if (opcode == BPF_EXIT) {
   6500 				if (BPF_SRC(insn->code) != BPF_K ||
   6501 				    insn->imm != 0 ||
   6502 				    insn->src_reg != BPF_REG_0 ||
   6503 				    insn->dst_reg != BPF_REG_0 ||
   6504 				    class == BPF_JMP32) {
   6505 					verbose(env, "BPF_EXIT uses reserved fields\n");
   6506 					return -EINVAL;
   6507 				}
   6508 
   6509 				if (env->cur_state->active_spin_lock) {
   6510 					verbose(env, "bpf_spin_unlock is missing\n");
   6511 					return -EINVAL;
   6512 				}
   6513 
   6514 				if (state->curframe) {
   6515 					/* exit from nested function */
   6516 					env->prev_insn_idx = env->insn_idx;
   6517 					err = prepare_func_exit(env, &env->insn_idx);
   6518 					if (err)
   6519 						return err;
   6520 					do_print_state = true;
   6521 					continue;
   6522 				}
   6523 
   6524 				err = check_reference_leak(env);
   6525 				if (err)
   6526 					return err;
   6527 
   6528 				/* eBPF calling convetion is such that R0 is used
   6529 				 * to return the value from eBPF program.
   6530 				 * Make sure that it's readable at this time
   6531 				 * of bpf_exit, which means that program wrote
   6532 				 * something into it earlier
   6533 				 */
   6534 				err = check_reg_arg(env, BPF_REG_0, SRC_OP);
   6535 				if (err)
   6536 					return err;
   6537 
   6538 				if (is_pointer_value(env, BPF_REG_0)) {
   6539 					verbose(env, "R0 leaks addr as return value\n");
   6540 					return -EACCES;
   6541 				}
   6542 
   6543 				err = check_return_code(env);
   6544 				if (err)
   6545 					return err;
   6546 process_bpf_exit:
   6547 				err = pop_stack(env, &env->prev_insn_idx,
   6548 						&env->insn_idx);
   6549 				if (err < 0) {
   6550 					if (err != -ENOENT)
   6551 						return err;
   6552 					break;
   6553 				} else {
   6554 					do_print_state = true;
   6555 					continue;
   6556 				}
   6557 			} else {
   6558 				err = check_cond_jmp_op(env, insn, &env->insn_idx);
   6559 				if (err)
   6560 					return err;
   6561 			}
   6562 		} else if (class == BPF_LD) {
   6563 			u8 mode = BPF_MODE(insn->code);
   6564 
   6565 			if (mode == BPF_ABS || mode == BPF_IND) {
   6566 				err = check_ld_abs(env, insn);
   6567 				if (err)
   6568 					return err;
   6569 
   6570 			} else if (mode == BPF_IMM) {
   6571 				err = check_ld_imm(env, insn);
   6572 				if (err)
   6573 					return err;
   6574 
   6575 				env->insn_idx++;
   6576 				env->insn_aux_data[env->insn_idx].seen = true;
   6577 			} else {
   6578 				verbose(env, "invalid BPF_LD mode\n");
   6579 				return -EINVAL;
   6580 			}
   6581 		} else {
   6582 			verbose(env, "unknown insn class %d\n", class);
   6583 			return -EINVAL;
   6584 		}
   6585 
   6586 		env->insn_idx++;
   6587 	}
   6588 
   6589 	verbose(env, "processed %d insns (limit %d), stack depth ",
   6590 		insn_processed, BPF_COMPLEXITY_LIMIT_INSNS);
   6591 	for (i = 0; i < env->subprog_cnt; i++) {
   6592 		u32 depth = env->subprog_info[i].stack_depth;
   6593 
   6594 		verbose(env, "%d", depth);
   6595 		if (i + 1 < env->subprog_cnt)
   6596 			verbose(env, "+");
   6597 	}
   6598 	verbose(env, "\n");
   6599 	env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
   6600 	return 0;
   6601 }
   6602 
   6603 static int check_map_prealloc(struct bpf_map *map)
   6604 {
   6605 	return (map->map_type != BPF_MAP_TYPE_HASH &&
   6606 		map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
   6607 		map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) ||
   6608 		!(map->map_flags & BPF_F_NO_PREALLOC);
   6609 }
   6610 
   6611 static bool is_tracing_prog_type(enum bpf_prog_type type)
   6612 {
   6613 	switch (type) {
   6614 	case BPF_PROG_TYPE_KPROBE:
   6615 	case BPF_PROG_TYPE_TRACEPOINT:
   6616 	case BPF_PROG_TYPE_PERF_EVENT:
   6617 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
   6618 		return true;
   6619 	default:
   6620 		return false;
   6621 	}
   6622 }
   6623 
   6624 static int check_map_prog_compatibility(struct bpf_verifier_env *env,
   6625 					struct bpf_map *map,
   6626 					struct bpf_prog *prog)
   6627 
   6628 {
   6629 	/* Make sure that BPF_PROG_TYPE_PERF_EVENT programs only use
   6630 	 * preallocated hash maps, since doing memory allocation
   6631 	 * in overflow_handler can crash depending on where nmi got
   6632 	 * triggered.
   6633 	 */
   6634 	if (prog->type == BPF_PROG_TYPE_PERF_EVENT) {
   6635 		if (!check_map_prealloc(map)) {
   6636 			verbose(env, "perf_event programs can only use preallocated hash map\n");
   6637 			return -EINVAL;
   6638 		}
   6639 		if (map->inner_map_meta &&
   6640 		    !check_map_prealloc(map->inner_map_meta)) {
   6641 			verbose(env, "perf_event programs can only use preallocated inner hash map\n");
   6642 			return -EINVAL;
   6643 		}
   6644 	}
   6645 
   6646 	if ((is_tracing_prog_type(prog->type) ||
   6647 	     prog->type == BPF_PROG_TYPE_SOCKET_FILTER) &&
   6648 	    map_value_has_spin_lock(map)) {
   6649 		verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
   6650 		return -EINVAL;
   6651 	}
   6652 
   6653 	if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
   6654 	    !bpf_offload_prog_map_match(prog, map)) {
   6655 		verbose(env, "offload device mismatch between prog and map\n");
   6656 		return -EINVAL;
   6657 	}
   6658 
   6659 	return 0;
   6660 }
   6661 
   6662 static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
   6663 {
   6664 	return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
   6665 		map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
   6666 }
   6667 
   6668 /* look for pseudo eBPF instructions that access map FDs and
   6669  * replace them with actual map pointers
   6670  */
   6671 static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
   6672 {
   6673 	struct bpf_insn *insn = env->prog->insnsi;
   6674 	int insn_cnt = env->prog->len;
   6675 	int i, j, err;
   6676 
   6677 	err = bpf_prog_calc_tag(env->prog);
   6678 	if (err)
   6679 		return err;
   6680 
   6681 	for (i = 0; i < insn_cnt; i++, insn++) {
   6682 		if (BPF_CLASS(insn->code) == BPF_LDX &&
   6683 		    (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
   6684 			verbose(env, "BPF_LDX uses reserved fields\n");
   6685 			return -EINVAL;
   6686 		}
   6687 
   6688 		if (BPF_CLASS(insn->code) == BPF_STX &&
   6689 		    ((BPF_MODE(insn->code) != BPF_MEM &&
   6690 		      BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) {
   6691 			verbose(env, "BPF_STX uses reserved fields\n");
   6692 			return -EINVAL;
   6693 		}
   6694 
   6695 		if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
   6696 			struct bpf_map *map;
   6697 			struct fd f;
   6698 
   6699 			if (i == insn_cnt - 1 || insn[1].code != 0 ||
   6700 			    insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
   6701 			    insn[1].off != 0) {
   6702 				verbose(env, "invalid bpf_ld_imm64 insn\n");
   6703 				return -EINVAL;
   6704 			}
   6705 
   6706 			if (insn->src_reg == 0)
   6707 				/* valid generic load 64-bit imm */
   6708 				goto next_insn;
   6709 
   6710 			if (insn[0].src_reg != BPF_PSEUDO_MAP_FD ||
   6711 			    insn[1].imm != 0) {
   6712 				verbose(env, "unrecognized bpf_ld_imm64 insn\n");
   6713 				return -EINVAL;
   6714 			}
   6715 
   6716 			f = fdget(insn[0].imm);
   6717 			map = __bpf_map_get(f);
   6718 			if (IS_ERR(map)) {
   6719 				verbose(env, "fd %d is not pointing to valid bpf_map\n",
   6720 					insn[0].imm);
   6721 				return PTR_ERR(map);
   6722 			}
   6723 
   6724 			err = check_map_prog_compatibility(env, map, env->prog);
   6725 			if (err) {
   6726 				fdput(f);
   6727 				return err;
   6728 			}
   6729 
   6730 			/* store map pointer inside BPF_LD_IMM64 instruction */
   6731 			insn[0].imm = (u32) (unsigned long) map;
   6732 			insn[1].imm = ((u64) (unsigned long) map) >> 32;
   6733 
   6734 			/* check whether we recorded this map already */
   6735 			for (j = 0; j < env->used_map_cnt; j++)
   6736 				if (env->used_maps[j] == map) {
   6737 					fdput(f);
   6738 					goto next_insn;
   6739 				}
   6740 
   6741 			if (env->used_map_cnt >= MAX_USED_MAPS) {
   6742 				fdput(f);
   6743 				return -E2BIG;
   6744 			}
   6745 
   6746 			/* hold the map. If the program is rejected by verifier,
   6747 			 * the map will be released by release_maps() or it
   6748 			 * will be used by the valid program until it's unloaded
   6749 			 * and all maps are released in free_used_maps()
   6750 			 */
   6751 			map = bpf_map_inc(map, false);
   6752 			if (IS_ERR(map)) {
   6753 				fdput(f);
   6754 				return PTR_ERR(map);
   6755 			}
   6756 			env->used_maps[env->used_map_cnt++] = map;
   6757 
   6758 			if (bpf_map_is_cgroup_storage(map) &&
   6759 			    bpf_cgroup_storage_assign(env->prog, map)) {
   6760 				verbose(env, "only one cgroup storage of each type is allowed\n");
   6761 				fdput(f);
   6762 				return -EBUSY;
   6763 			}
   6764 
   6765 			fdput(f);
   6766 next_insn:
   6767 			insn++;
   6768 			i++;
   6769 			continue;
   6770 		}
   6771 
   6772 		/* Basic sanity check before we invest more work here. */
   6773 		if (!bpf_opcode_in_insntable(insn->code)) {
   6774 			verbose(env, "unknown opcode %02x\n", insn->code);
   6775 			return -EINVAL;
   6776 		}
   6777 	}
   6778 
   6779 	/* now all pseudo BPF_LD_IMM64 instructions load valid
   6780 	 * 'struct bpf_map *' into a register instead of user map_fd.
   6781 	 * These pointers will be used later by verifier to validate map access.
   6782 	 */
   6783 	return 0;
   6784 }
   6785 
   6786 /* drop refcnt of maps used by the rejected program */
   6787 static void release_maps(struct bpf_verifier_env *env)
   6788 {
   6789 	enum bpf_cgroup_storage_type stype;
   6790 	int i;
   6791 
   6792 	for_each_cgroup_storage_type(stype) {
   6793 		if (!env->prog->aux->cgroup_storage[stype])
   6794 			continue;
   6795 		bpf_cgroup_storage_release(env->prog,
   6796 			env->prog->aux->cgroup_storage[stype]);
   6797 	}
   6798 
   6799 	for (i = 0; i < env->used_map_cnt; i++)
   6800 		bpf_map_put(env->used_maps[i]);
   6801 }
   6802 
   6803 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
   6804 static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
   6805 {
   6806 	struct bpf_insn *insn = env->prog->insnsi;
   6807 	int insn_cnt = env->prog->len;
   6808 	int i;
   6809 
   6810 	for (i = 0; i < insn_cnt; i++, insn++)
   6811 		if (insn->code == (BPF_LD | BPF_IMM | BPF_DW))
   6812 			insn->src_reg = 0;
   6813 }
   6814 
   6815 /* single env->prog->insni[off] instruction was replaced with the range
   6816  * insni[off, off + cnt).  Adjust corresponding insn_aux_data by copying
   6817  * [0, off) and [off, end) to new locations, so the patched range stays zero
   6818  */
   6819 static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len,
   6820 				u32 off, u32 cnt)
   6821 {
   6822 	struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
   6823 	int i;
   6824 
   6825 	if (cnt == 1)
   6826 		return 0;
   6827 	new_data = vzalloc(array_size(prog_len,
   6828 				      sizeof(struct bpf_insn_aux_data)));
   6829 	if (!new_data)
   6830 		return -ENOMEM;
   6831 	memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
   6832 	memcpy(new_data + off + cnt - 1, old_data + off,
   6833 	       sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
   6834 	for (i = off; i < off + cnt - 1; i++)
   6835 		new_data[i].seen = true;
   6836 	env->insn_aux_data = new_data;
   6837 	vfree(old_data);
   6838 	return 0;
   6839 }
   6840 
   6841 static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
   6842 {
   6843 	int i;
   6844 
   6845 	if (len == 1)
   6846 		return;
   6847 	/* NOTE: fake 'exit' subprog should be updated as well. */
   6848 	for (i = 0; i <= env->subprog_cnt; i++) {
   6849 		if (env->subprog_info[i].start <= off)
   6850 			continue;
   6851 		env->subprog_info[i].start += len - 1;
   6852 	}
   6853 }
   6854 
   6855 static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
   6856 					    const struct bpf_insn *patch, u32 len)
   6857 {
   6858 	struct bpf_prog *new_prog;
   6859 
   6860 	new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
   6861 	if (!new_prog)
   6862 		return NULL;
   6863 	if (adjust_insn_aux_data(env, new_prog->len, off, len))
   6864 		return NULL;
   6865 	adjust_subprog_starts(env, off, len);
   6866 	return new_prog;
   6867 }
   6868 
   6869 static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
   6870 					      u32 off, u32 cnt)
   6871 {
   6872 	int i, j;
   6873 
   6874 	/* find first prog starting at or after off (first to remove) */
   6875 	for (i = 0; i < env->subprog_cnt; i++)
   6876 		if (env->subprog_info[i].start >= off)
   6877 			break;
   6878 	/* find first prog starting at or after off + cnt (first to stay) */
   6879 	for (j = i; j < env->subprog_cnt; j++)
   6880 		if (env->subprog_info[j].start >= off + cnt)
   6881 			break;
   6882 	/* if j doesn't start exactly at off + cnt, we are just removing
   6883 	 * the front of previous prog
   6884 	 */
   6885 	if (env->subprog_info[j].start != off + cnt)
   6886 		j--;
   6887 
   6888 	if (j > i) {
   6889 		struct bpf_prog_aux *aux = env->prog->aux;
   6890 		int move;
   6891 
   6892 		/* move fake 'exit' subprog as well */
   6893 		move = env->subprog_cnt + 1 - j;
   6894 
   6895 		memmove(env->subprog_info + i,
   6896 			env->subprog_info + j,
   6897 			sizeof(*env->subprog_info) * move);
   6898 		env->subprog_cnt -= j - i;
   6899 
   6900 		/* remove func_info */
   6901 		if (aux->func_info) {
   6902 			move = aux->func_info_cnt - j;
   6903 
   6904 			memmove(aux->func_info + i,
   6905 				aux->func_info + j,
   6906 				sizeof(*aux->func_info) * move);
   6907 			aux->func_info_cnt -= j - i;
   6908 			/* func_info->insn_off is set after all code rewrites,
   6909 			 * in adjust_btf_func() - no need to adjust
   6910 			 */
   6911 		}
   6912 	} else {
   6913 		/* convert i from "first prog to remove" to "first to adjust" */
   6914 		if (env->subprog_info[i].start == off)
   6915 			i++;
   6916 	}
   6917 
   6918 	/* update fake 'exit' subprog as well */
   6919 	for (; i <= env->subprog_cnt; i++)
   6920 		env->subprog_info[i].start -= cnt;
   6921 
   6922 	return 0;
   6923 }
   6924 
   6925 static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
   6926 				      u32 cnt)
   6927 {
   6928 	struct bpf_prog *prog = env->prog;
   6929 	u32 i, l_off, l_cnt, nr_linfo;
   6930 	struct bpf_line_info *linfo;
   6931 
   6932 	nr_linfo = prog->aux->nr_linfo;
   6933 	if (!nr_linfo)
   6934 		return 0;
   6935 
   6936 	linfo = prog->aux->linfo;
   6937 
   6938 	/* find first line info to remove, count lines to be removed */
   6939 	for (i = 0; i < nr_linfo; i++)
   6940 		if (linfo[i].insn_off >= off)
   6941 			break;
   6942 
   6943 	l_off = i;
   6944 	l_cnt = 0;
   6945 	for (; i < nr_linfo; i++)
   6946 		if (linfo[i].insn_off < off + cnt)
   6947 			l_cnt++;
   6948 		else
   6949 			break;
   6950 
   6951 	/* First live insn doesn't match first live linfo, it needs to "inherit"
   6952 	 * last removed linfo.  prog is already modified, so prog->len == off
   6953 	 * means no live instructions after (tail of the program was removed).
   6954 	 */
   6955 	if (prog->len != off && l_cnt &&
   6956 	    (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
   6957 		l_cnt--;
   6958 		linfo[--i].insn_off = off + cnt;
   6959 	}
   6960 
   6961 	/* remove the line info which refer to the removed instructions */
   6962 	if (l_cnt) {
   6963 		memmove(linfo + l_off, linfo + i,
   6964 			sizeof(*linfo) * (nr_linfo - i));
   6965 
   6966 		prog->aux->nr_linfo -= l_cnt;
   6967 		nr_linfo = prog->aux->nr_linfo;
   6968 	}
   6969 
   6970 	/* pull all linfo[i].insn_off >= off + cnt in by cnt */
   6971 	for (i = l_off; i < nr_linfo; i++)
   6972 		linfo[i].insn_off -= cnt;
   6973 
   6974 	/* fix up all subprogs (incl. 'exit') which start >= off */
   6975 	for (i = 0; i <= env->subprog_cnt; i++)
   6976 		if (env->subprog_info[i].linfo_idx > l_off) {
   6977 			/* program may have started in the removed region but
   6978 			 * may not be fully removed
   6979 			 */
   6980 			if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
   6981 				env->subprog_info[i].linfo_idx -= l_cnt;
   6982 			else
   6983 				env->subprog_info[i].linfo_idx = l_off;
   6984 		}
   6985 
   6986 	return 0;
   6987 }
   6988 
   6989 static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
   6990 {
   6991 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
   6992 	unsigned int orig_prog_len = env->prog->len;
   6993 	int err;
   6994 
   6995 	if (bpf_prog_is_dev_bound(env->prog->aux))
   6996 		bpf_prog_offload_remove_insns(env, off, cnt);
   6997 
   6998 	err = bpf_remove_insns(env->prog, off, cnt);
   6999 	if (err)
   7000 		return err;
   7001 
   7002 	err = adjust_subprog_starts_after_remove(env, off, cnt);
   7003 	if (err)
   7004 		return err;
   7005 
   7006 	err = bpf_adj_linfo_after_remove(env, off, cnt);
   7007 	if (err)
   7008 		return err;
   7009 
   7010 	memmove(aux_data + off,	aux_data + off + cnt,
   7011 		sizeof(*aux_data) * (orig_prog_len - off - cnt));
   7012 
   7013 	return 0;
   7014 }
   7015 
   7016 /* The verifier does more data flow analysis than llvm and will not
   7017  * explore branches that are dead at run time. Malicious programs can
   7018  * have dead code too. Therefore replace all dead at-run-time code
   7019  * with 'ja -1'.
   7020  *
   7021  * Just nops are not optimal, e.g. if they would sit at the end of the
   7022  * program and through another bug we would manage to jump there, then
   7023  * we'd execute beyond program memory otherwise. Returning exception
   7024  * code also wouldn't work since we can have subprogs where the dead
   7025  * code could be located.
   7026  */
   7027 static void sanitize_dead_code(struct bpf_verifier_env *env)
   7028 {
   7029 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
   7030 	struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
   7031 	struct bpf_insn *insn = env->prog->insnsi;
   7032 	const int insn_cnt = env->prog->len;
   7033 	int i;
   7034 
   7035 	for (i = 0; i < insn_cnt; i++) {
   7036 		if (aux_data[i].seen)
   7037 			continue;
   7038 		memcpy(insn + i, &trap, sizeof(trap));
   7039 	}
   7040 }
   7041 
   7042 static bool insn_is_cond_jump(u8 code)
   7043 {
   7044 	u8 op;
   7045 
   7046 	if (BPF_CLASS(code) == BPF_JMP32)
   7047 		return true;
   7048 
   7049 	if (BPF_CLASS(code) != BPF_JMP)
   7050 		return false;
   7051 
   7052 	op = BPF_OP(code);
   7053 	return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
   7054 }
   7055 
   7056 static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
   7057 {
   7058 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
   7059 	struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
   7060 	struct bpf_insn *insn = env->prog->insnsi;
   7061 	const int insn_cnt = env->prog->len;
   7062 	int i;
   7063 
   7064 	for (i = 0; i < insn_cnt; i++, insn++) {
   7065 		if (!insn_is_cond_jump(insn->code))
   7066 			continue;
   7067 
   7068 		if (!aux_data[i + 1].seen)
   7069 			ja.off = insn->off;
   7070 		else if (!aux_data[i + 1 + insn->off].seen)
   7071 			ja.off = 0;
   7072 		else
   7073 			continue;
   7074 
   7075 		if (bpf_prog_is_dev_bound(env->prog->aux))
   7076 			bpf_prog_offload_replace_insn(env, i, &ja);
   7077 
   7078 		memcpy(insn, &ja, sizeof(ja));
   7079 	}
   7080 }
   7081 
   7082 static int opt_remove_dead_code(struct bpf_verifier_env *env)
   7083 {
   7084 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
   7085 	int insn_cnt = env->prog->len;
   7086 	int i, err;
   7087 
   7088 	for (i = 0; i < insn_cnt; i++) {
   7089 		int j;
   7090 
   7091 		j = 0;
   7092 		while (i + j < insn_cnt && !aux_data[i + j].seen)
   7093 			j++;
   7094 		if (!j)
   7095 			continue;
   7096 
   7097 		err = verifier_remove_insns(env, i, j);
   7098 		if (err)
   7099 			return err;
   7100 		insn_cnt = env->prog->len;
   7101 	}
   7102 
   7103 	return 0;
   7104 }
   7105 
   7106 static int opt_remove_nops(struct bpf_verifier_env *env)
   7107 {
   7108 	const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
   7109 	struct bpf_insn *insn = env->prog->insnsi;
   7110 	int insn_cnt = env->prog->len;
   7111 	int i, err;
   7112 
   7113 	for (i = 0; i < insn_cnt; i++) {
   7114 		if (memcmp(&insn[i], &ja, sizeof(ja)))
   7115 			continue;
   7116 
   7117 		err = verifier_remove_insns(env, i, 1);
   7118 		if (err)
   7119 			return err;
   7120 		insn_cnt--;
   7121 		i--;
   7122 	}
   7123 
   7124 	return 0;
   7125 }
   7126 
   7127 /* convert load instructions that access fields of a context type into a
   7128  * sequence of instructions that access fields of the underlying structure:
   7129  *     struct __sk_buff    -> struct sk_buff
   7130  *     struct bpf_sock_ops -> struct sock
   7131  */
   7132 static int convert_ctx_accesses(struct bpf_verifier_env *env)
   7133 {
   7134 	const struct bpf_verifier_ops *ops = env->ops;
   7135 	int i, cnt, size, ctx_field_size, delta = 0;
   7136 	const int insn_cnt = env->prog->len;
   7137 	struct bpf_insn insn_buf[16], *insn;
   7138 	u32 target_size, size_default, off;
   7139 	struct bpf_prog *new_prog;
   7140 	enum bpf_access_type type;
   7141 	bool is_narrower_load;
   7142 
   7143 	if (ops->gen_prologue || env->seen_direct_write) {
   7144 		if (!ops->gen_prologue) {
   7145 			verbose(env, "bpf verifier is misconfigured\n");
   7146 			return -EINVAL;
   7147 		}
   7148 		cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
   7149 					env->prog);
   7150 		if (cnt >= ARRAY_SIZE(insn_buf)) {
   7151 			verbose(env, "bpf verifier is misconfigured\n");
   7152 			return -EINVAL;
   7153 		} else if (cnt) {
   7154 			new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
   7155 			if (!new_prog)
   7156 				return -ENOMEM;
   7157 
   7158 			env->prog = new_prog;
   7159 			delta += cnt - 1;
   7160 		}
   7161 	}
   7162 
   7163 	if (bpf_prog_is_dev_bound(env->prog->aux))
   7164 		return 0;
   7165 
   7166 	insn = env->prog->insnsi + delta;
   7167 
   7168 	for (i = 0; i < insn_cnt; i++, insn++) {
   7169 		bpf_convert_ctx_access_t convert_ctx_access;
   7170 
   7171 		if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
   7172 		    insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
   7173 		    insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
   7174 		    insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
   7175 			type = BPF_READ;
   7176 		else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
   7177 			 insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
   7178 			 insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
   7179 			 insn->code == (BPF_STX | BPF_MEM | BPF_DW))
   7180 			type = BPF_WRITE;
   7181 		else
   7182 			continue;
   7183 
   7184 		if (type == BPF_WRITE &&
   7185 		    env->insn_aux_data[i + delta].sanitize_stack_off) {
   7186 			struct bpf_insn patch[] = {
   7187 				/* Sanitize suspicious stack slot with zero.
   7188 				 * There are no memory dependencies for this store,
   7189 				 * since it's only using frame pointer and immediate
   7190 				 * constant of zero
   7191 				 */
   7192 				BPF_ST_MEM(BPF_DW, BPF_REG_FP,
   7193 					   env->insn_aux_data[i + delta].sanitize_stack_off,
   7194 					   0),
   7195 				/* the original STX instruction will immediately
   7196 				 * overwrite the same stack slot with appropriate value
   7197 				 */
   7198 				*insn,
   7199 			};
   7200 
   7201 			cnt = ARRAY_SIZE(patch);
   7202 			new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
   7203 			if (!new_prog)
   7204 				return -ENOMEM;
   7205 
   7206 			delta    += cnt - 1;
   7207 			env->prog = new_prog;
   7208 			insn      = new_prog->insnsi + i + delta;
   7209 			continue;
   7210 		}
   7211 
   7212 		switch (env->insn_aux_data[i + delta].ptr_type) {
   7213 		case PTR_TO_CTX:
   7214 			if (!ops->convert_ctx_access)
   7215 				continue;
   7216 			convert_ctx_access = ops->convert_ctx_access;
   7217 			break;
   7218 		case PTR_TO_SOCKET:
   7219 		case PTR_TO_SOCK_COMMON:
   7220 			convert_ctx_access = bpf_sock_convert_ctx_access;
   7221 			break;
   7222 		case PTR_TO_TCP_SOCK:
   7223 			convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
   7224 			break;
   7225 		default:
   7226 			continue;
   7227 		}
   7228 
   7229 		ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
   7230 		size = BPF_LDST_BYTES(insn);
   7231 
   7232 		/* If the read access is a narrower load of the field,
   7233 		 * convert to a 4/8-byte load, to minimum program type specific
   7234 		 * convert_ctx_access changes. If conversion is successful,
   7235 		 * we will apply proper mask to the result.
   7236 		 */
   7237 		is_narrower_load = size < ctx_field_size;
   7238 		size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
   7239 		off = insn->off;
   7240 		if (is_narrower_load) {
   7241 			u8 size_code;
   7242 
   7243 			if (type == BPF_WRITE) {
   7244 				verbose(env, "bpf verifier narrow ctx access misconfigured\n");
   7245 				return -EINVAL;
   7246 			}
   7247 
   7248 			size_code = BPF_H;
   7249 			if (ctx_field_size == 4)
   7250 				size_code = BPF_W;
   7251 			else if (ctx_field_size == 8)
   7252 				size_code = BPF_DW;
   7253 
   7254 			insn->off = off & ~(size_default - 1);
   7255 			insn->code = BPF_LDX | BPF_MEM | size_code;
   7256 		}
   7257 
   7258 		target_size = 0;
   7259 		cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
   7260 					 &target_size);
   7261 		if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
   7262 		    (ctx_field_size && !target_size)) {
   7263 			verbose(env, "bpf verifier is misconfigured\n");
   7264 			return -EINVAL;
   7265 		}
   7266 
   7267 		if (is_narrower_load && size < target_size) {
   7268 			u8 shift = (off & (size_default - 1)) * 8;
   7269 
   7270 			if (ctx_field_size <= 4) {
   7271 				if (shift)
   7272 					insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
   7273 									insn->dst_reg,
   7274 									shift);
   7275 				insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
   7276 								(1 << size * 8) - 1);
   7277 			} else {
   7278 				if (shift)
   7279 					insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
   7280 									insn->dst_reg,
   7281 									shift);
   7282 				insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
   7283 								(1 << size * 8) - 1);
   7284 			}
   7285 		}
   7286 
   7287 		new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
   7288 		if (!new_prog)
   7289 			return -ENOMEM;
   7290 
   7291 		delta += cnt - 1;
   7292 
   7293 		/* keep walking new program and skip insns we just inserted */
   7294 		env->prog = new_prog;
   7295 		insn      = new_prog->insnsi + i + delta;
   7296 	}
   7297 
   7298 	return 0;
   7299 }
   7300 
   7301 static int jit_subprogs(struct bpf_verifier_env *env)
   7302 {
   7303 	struct bpf_prog *prog = env->prog, **func, *tmp;
   7304 	int i, j, subprog_start, subprog_end = 0, len, subprog;
   7305 	struct bpf_insn *insn;
   7306 	void *old_bpf_func;
   7307 	int err;
   7308 
   7309 	if (env->subprog_cnt <= 1)
   7310 		return 0;
   7311 
   7312 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
   7313 		if (insn->code != (BPF_JMP | BPF_CALL) ||
   7314 		    insn->src_reg != BPF_PSEUDO_CALL)
   7315 			continue;
   7316 		/* Upon error here we cannot fall back to interpreter but
   7317 		 * need a hard reject of the program. Thus -EFAULT is
   7318 		 * propagated in any case.
   7319 		 */
   7320 		subprog = find_subprog(env, i + insn->imm + 1);
   7321 		if (subprog < 0) {
   7322 			WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
   7323 				  i + insn->imm + 1);
   7324 			return -EFAULT;
   7325 		}
   7326 		/* temporarily remember subprog id inside insn instead of
   7327 		 * aux_data, since next loop will split up all insns into funcs
   7328 		 */
   7329 		insn->off = subprog;
   7330 		/* remember original imm in case JIT fails and fallback
   7331 		 * to interpreter will be needed
   7332 		 */
   7333 		env->insn_aux_data[i].call_imm = insn->imm;
   7334 		/* point imm to __bpf_call_base+1 from JITs point of view */
   7335 		insn->imm = 1;
   7336 	}
   7337 
   7338 	err = bpf_prog_alloc_jited_linfo(prog);
   7339 	if (err)
   7340 		goto out_undo_insn;
   7341 
   7342 	err = -ENOMEM;
   7343 	func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
   7344 	if (!func)
   7345 		goto out_undo_insn;
   7346 
   7347 	for (i = 0; i < env->subprog_cnt; i++) {
   7348 		subprog_start = subprog_end;
   7349 		subprog_end = env->subprog_info[i + 1].start;
   7350 
   7351 		len = subprog_end - subprog_start;
   7352 		/* BPF_PROG_RUN doesn't call subprogs directly,
   7353 		 * hence main prog stats include the runtime of subprogs.
   7354 		 * subprogs don't have IDs and not reachable via prog_get_next_id
   7355 		 * func[i]->aux->stats will never be accessed and stays NULL
   7356 		 */
   7357 		func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
   7358 		if (!func[i])
   7359 			goto out_free;
   7360 		memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
   7361 		       len * sizeof(struct bpf_insn));
   7362 		func[i]->type = prog->type;
   7363 		func[i]->len = len;
   7364 		if (bpf_prog_calc_tag(func[i]))
   7365 			goto out_free;
   7366 		func[i]->is_func = 1;
   7367 		func[i]->aux->func_idx = i;
   7368 		/* the btf and func_info will be freed only at prog->aux */
   7369 		func[i]->aux->btf = prog->aux->btf;
   7370 		func[i]->aux->func_info = prog->aux->func_info;
   7371 
   7372 		/* Use bpf_prog_F_tag to indicate functions in stack traces.
   7373 		 * Long term would need debug info to populate names
   7374 		 */
   7375 		func[i]->aux->name[0] = 'F';
   7376 		func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
   7377 		func[i]->jit_requested = 1;
   7378 		func[i]->aux->linfo = prog->aux->linfo;
   7379 		func[i]->aux->nr_linfo = prog->aux->nr_linfo;
   7380 		func[i]->aux->jited_linfo = prog->aux->jited_linfo;
   7381 		func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
   7382 		func[i] = bpf_int_jit_compile(func[i]);
   7383 		if (!func[i]->jited) {
   7384 			err = -ENOTSUPP;
   7385 			goto out_free;
   7386 		}
   7387 		cond_resched();
   7388 	}
   7389 	/* at this point all bpf functions were successfully JITed
   7390 	 * now populate all bpf_calls with correct addresses and
   7391 	 * run last pass of JIT
   7392 	 */
   7393 	for (i = 0; i < env->subprog_cnt; i++) {
   7394 		insn = func[i]->insnsi;
   7395 		for (j = 0; j < func[i]->len; j++, insn++) {
   7396 			if (insn->code != (BPF_JMP | BPF_CALL) ||
   7397 			    insn->src_reg != BPF_PSEUDO_CALL)
   7398 				continue;
   7399 			subprog = insn->off;
   7400 			insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
   7401 				func[subprog]->bpf_func -
   7402 				__bpf_call_base;
   7403 		}
   7404 
   7405 		/* we use the aux data to keep a list of the start addresses
   7406 		 * of the JITed images for each function in the program
   7407 		 *
   7408 		 * for some architectures, such as powerpc64, the imm field
   7409 		 * might not be large enough to hold the offset of the start
   7410 		 * address of the callee's JITed image from __bpf_call_base
   7411 		 *
   7412 		 * in such cases, we can lookup the start address of a callee
   7413 		 * by using its subprog id, available from the off field of
   7414 		 * the call instruction, as an index for this list
   7415 		 */
   7416 		func[i]->aux->func = func;
   7417 		func[i]->aux->func_cnt = env->subprog_cnt;
   7418 	}
   7419 	for (i = 0; i < env->subprog_cnt; i++) {
   7420 		old_bpf_func = func[i]->bpf_func;
   7421 		tmp = bpf_int_jit_compile(func[i]);
   7422 		if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
   7423 			verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
   7424 			err = -ENOTSUPP;
   7425 			goto out_free;
   7426 		}
   7427 		cond_resched();
   7428 	}
   7429 
   7430 	/* finally lock prog and jit images for all functions and
   7431 	 * populate kallsysm
   7432 	 */
   7433 	for (i = 0; i < env->subprog_cnt; i++) {
   7434 		bpf_prog_lock_ro(func[i]);
   7435 		bpf_prog_kallsyms_add(func[i]);
   7436 	}
   7437 
   7438 	/* Last step: make now unused interpreter insns from main
   7439 	 * prog consistent for later dump requests, so they can
   7440 	 * later look the same as if they were interpreted only.
   7441 	 */
   7442 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
   7443 		if (insn->code != (BPF_JMP | BPF_CALL) ||
   7444 		    insn->src_reg != BPF_PSEUDO_CALL)
   7445 			continue;
   7446 		insn->off = env->insn_aux_data[i].call_imm;
   7447 		subprog = find_subprog(env, i + insn->off + 1);
   7448 		insn->imm = subprog;
   7449 	}
   7450 
   7451 	prog->jited = 1;
   7452 	prog->bpf_func = func[0]->bpf_func;
   7453 	prog->aux->func = func;
   7454 	prog->aux->func_cnt = env->subprog_cnt;
   7455 	bpf_prog_free_unused_jited_linfo(prog);
   7456 	return 0;
   7457 out_free:
   7458 	for (i = 0; i < env->subprog_cnt; i++)
   7459 		if (func[i])
   7460 			bpf_jit_free(func[i]);
   7461 	kfree(func);
   7462 out_undo_insn:
   7463 	/* cleanup main prog to be interpreted */
   7464 	prog->jit_requested = 0;
   7465 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
   7466 		if (insn->code != (BPF_JMP | BPF_CALL) ||
   7467 		    insn->src_reg != BPF_PSEUDO_CALL)
   7468 			continue;
   7469 		insn->off = 0;
   7470 		insn->imm = env->insn_aux_data[i].call_imm;
   7471 	}
   7472 	bpf_prog_free_jited_linfo(prog);
   7473 	return err;
   7474 }
   7475 
   7476 static int fixup_call_args(struct bpf_verifier_env *env)
   7477 {
   7478 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
   7479 	struct bpf_prog *prog = env->prog;
   7480 	struct bpf_insn *insn = prog->insnsi;
   7481 	int i, depth;
   7482 #endif
   7483 	int err = 0;
   7484 
   7485 	if (env->prog->jit_requested &&
   7486 	    !bpf_prog_is_dev_bound(env->prog->aux)) {
   7487 		err = jit_subprogs(env);
   7488 		if (err == 0)
   7489 			return 0;
   7490 		if (err == -EFAULT)
   7491 			return err;
   7492 	}
   7493 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
   7494 	for (i = 0; i < prog->len; i++, insn++) {
   7495 		if (insn->code != (BPF_JMP | BPF_CALL) ||
   7496 		    insn->src_reg != BPF_PSEUDO_CALL)
   7497 			continue;
   7498 		depth = get_callee_stack_depth(env, insn, i);
   7499 		if (depth < 0)
   7500 			return depth;
   7501 		bpf_patch_call_args(insn, depth);
   7502 	}
   7503 	err = 0;
   7504 #endif
   7505 	return err;
   7506 }
   7507 
   7508 /* fixup insn->imm field of bpf_call instructions
   7509  * and inline eligible helpers as explicit sequence of BPF instructions
   7510  *
   7511  * this function is called after eBPF program passed verification
   7512  */
   7513 static int fixup_bpf_calls(struct bpf_verifier_env *env)
   7514 {
   7515 	struct bpf_prog *prog = env->prog;
   7516 	struct bpf_insn *insn = prog->insnsi;
   7517 	const struct bpf_func_proto *fn;
   7518 	const int insn_cnt = prog->len;
   7519 	const struct bpf_map_ops *ops;
   7520 	struct bpf_insn_aux_data *aux;
   7521 	struct bpf_insn insn_buf[16];
   7522 	struct bpf_prog *new_prog;
   7523 	struct bpf_map *map_ptr;
   7524 	int i, cnt, delta = 0;
   7525 
   7526 	for (i = 0; i < insn_cnt; i++, insn++) {
   7527 		if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
   7528 		    insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
   7529 		    insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
   7530 		    insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
   7531 			bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
   7532 			struct bpf_insn mask_and_div[] = {
   7533 				BPF_MOV32_REG(insn->src_reg, insn->src_reg),
   7534 				/* Rx div 0 -> 0 */
   7535 				BPF_JMP_IMM(BPF_JNE, insn->src_reg, 0, 2),
   7536 				BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
   7537 				BPF_JMP_IMM(BPF_JA, 0, 0, 1),
   7538 				*insn,
   7539 			};
   7540 			struct bpf_insn mask_and_mod[] = {
   7541 				BPF_MOV32_REG(insn->src_reg, insn->src_reg),
   7542 				/* Rx mod 0 -> Rx */
   7543 				BPF_JMP_IMM(BPF_JEQ, insn->src_reg, 0, 1),
   7544 				*insn,
   7545 			};
   7546 			struct bpf_insn *patchlet;
   7547 
   7548 			if (insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
   7549 			    insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
   7550 				patchlet = mask_and_div + (is64 ? 1 : 0);
   7551 				cnt = ARRAY_SIZE(mask_and_div) - (is64 ? 1 : 0);
   7552 			} else {
   7553 				patchlet = mask_and_mod + (is64 ? 1 : 0);
   7554 				cnt = ARRAY_SIZE(mask_and_mod) - (is64 ? 1 : 0);
   7555 			}
   7556 
   7557 			new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
   7558 			if (!new_prog)
   7559 				return -ENOMEM;
   7560 
   7561 			delta    += cnt - 1;
   7562 			env->prog = prog = new_prog;
   7563 			insn      = new_prog->insnsi + i + delta;
   7564 			continue;
   7565 		}
   7566 
   7567 		if (BPF_CLASS(insn->code) == BPF_LD &&
   7568 		    (BPF_MODE(insn->code) == BPF_ABS ||
   7569 		     BPF_MODE(insn->code) == BPF_IND)) {
   7570 			cnt = env->ops->gen_ld_abs(insn, insn_buf);
   7571 			if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
   7572 				verbose(env, "bpf verifier is misconfigured\n");
   7573 				return -EINVAL;
   7574 			}
   7575 
   7576 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
   7577 			if (!new_prog)
   7578 				return -ENOMEM;
   7579 
   7580 			delta    += cnt - 1;
   7581 			env->prog = prog = new_prog;
   7582 			insn      = new_prog->insnsi + i + delta;
   7583 			continue;
   7584 		}
   7585 
   7586 		if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
   7587 		    insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
   7588 			const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
   7589 			const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
   7590 			struct bpf_insn insn_buf[16];
   7591 			struct bpf_insn *patch = &insn_buf[0];
   7592 			bool issrc, isneg;
   7593 			u32 off_reg;
   7594 
   7595 			aux = &env->insn_aux_data[i + delta];
   7596 			if (!aux->alu_state ||
   7597 			    aux->alu_state == BPF_ALU_NON_POINTER)
   7598 				continue;
   7599 
   7600 			isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
   7601 			issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
   7602 				BPF_ALU_SANITIZE_SRC;
   7603 
   7604 			off_reg = issrc ? insn->src_reg : insn->dst_reg;
   7605 			if (isneg)
   7606 				*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
   7607 			*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit - 1);
   7608 			*patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
   7609 			*patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
   7610 			*patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
   7611 			*patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
   7612 			if (issrc) {
   7613 				*patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX,
   7614 							 off_reg);
   7615 				insn->src_reg = BPF_REG_AX;
   7616 			} else {
   7617 				*patch++ = BPF_ALU64_REG(BPF_AND, off_reg,
   7618 							 BPF_REG_AX);
   7619 			}
   7620 			if (isneg)
   7621 				insn->code = insn->code == code_add ?
   7622 					     code_sub : code_add;
   7623 			*patch++ = *insn;
   7624 			if (issrc && isneg)
   7625 				*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
   7626 			cnt = patch - insn_buf;
   7627 
   7628 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
   7629 			if (!new_prog)
   7630 				return -ENOMEM;
   7631 
   7632 			delta    += cnt - 1;
   7633 			env->prog = prog = new_prog;
   7634 			insn      = new_prog->insnsi + i + delta;
   7635 			continue;
   7636 		}
   7637 
   7638 		if (insn->code != (BPF_JMP | BPF_CALL))
   7639 			continue;
   7640 		if (insn->src_reg == BPF_PSEUDO_CALL)
   7641 			continue;
   7642 
   7643 		if (insn->imm == BPF_FUNC_get_route_realm)
   7644 			prog->dst_needed = 1;
   7645 		if (insn->imm == BPF_FUNC_get_prandom_u32)
   7646 			bpf_user_rnd_init_once();
   7647 		if (insn->imm == BPF_FUNC_override_return)
   7648 			prog->kprobe_override = 1;
   7649 		if (insn->imm == BPF_FUNC_tail_call) {
   7650 			/* If we tail call into other programs, we
   7651 			 * cannot make any assumptions since they can
   7652 			 * be replaced dynamically during runtime in
   7653 			 * the program array.
   7654 			 */
   7655 			prog->cb_access = 1;
   7656 			env->prog->aux->stack_depth = MAX_BPF_STACK;
   7657 			env->prog->aux->max_pkt_offset = MAX_PACKET_OFF;
   7658 
   7659 			/* mark bpf_tail_call as different opcode to avoid
   7660 			 * conditional branch in the interpeter for every normal
   7661 			 * call and to prevent accidental JITing by JIT compiler
   7662 			 * that doesn't support bpf_tail_call yet
   7663 			 */
   7664 			insn->imm = 0;
   7665 			insn->code = BPF_JMP | BPF_TAIL_CALL;
   7666 
   7667 			aux = &env->insn_aux_data[i + delta];
   7668 			if (!bpf_map_ptr_unpriv(aux))
   7669 				continue;
   7670 
   7671 			/* instead of changing every JIT dealing with tail_call
   7672 			 * emit two extra insns:
   7673 			 * if (index >= max_entries) goto out;
   7674 			 * index &= array->index_mask;
   7675 			 * to avoid out-of-bounds cpu speculation
   7676 			 */
   7677 			if (bpf_map_ptr_poisoned(aux)) {
   7678 				verbose(env, "tail_call abusing map_ptr\n");
   7679 				return -EINVAL;
   7680 			}
   7681 
   7682 			map_ptr = BPF_MAP_PTR(aux->map_state);
   7683 			insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
   7684 						  map_ptr->max_entries, 2);
   7685 			insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
   7686 						    container_of(map_ptr,
   7687 								 struct bpf_array,
   7688 								 map)->index_mask);
   7689 			insn_buf[2] = *insn;
   7690 			cnt = 3;
   7691 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
   7692 			if (!new_prog)
   7693 				return -ENOMEM;
   7694 
   7695 			delta    += cnt - 1;
   7696 			env->prog = prog = new_prog;
   7697 			insn      = new_prog->insnsi + i + delta;
   7698 			continue;
   7699 		}
   7700 
   7701 		/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
   7702 		 * and other inlining handlers are currently limited to 64 bit
   7703 		 * only.
   7704 		 */
   7705 		if (prog->jit_requested && BITS_PER_LONG == 64 &&
   7706 		    (insn->imm == BPF_FUNC_map_lookup_elem ||
   7707 		     insn->imm == BPF_FUNC_map_update_elem ||
   7708 		     insn->imm == BPF_FUNC_map_delete_elem ||
   7709 		     insn->imm == BPF_FUNC_map_push_elem   ||
   7710 		     insn->imm == BPF_FUNC_map_pop_elem    ||
   7711 		     insn->imm == BPF_FUNC_map_peek_elem)) {
   7712 			aux = &env->insn_aux_data[i + delta];
   7713 			if (bpf_map_ptr_poisoned(aux))
   7714 				goto patch_call_imm;
   7715 
   7716 			map_ptr = BPF_MAP_PTR(aux->map_state);
   7717 			ops = map_ptr->ops;
   7718 			if (insn->imm == BPF_FUNC_map_lookup_elem &&
   7719 			    ops->map_gen_lookup) {
   7720 				cnt = ops->map_gen_lookup(map_ptr, insn_buf);
   7721 				if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
   7722 					verbose(env, "bpf verifier is misconfigured\n");
   7723 					return -EINVAL;
   7724 				}
   7725 
   7726 				new_prog = bpf_patch_insn_data(env, i + delta,
   7727 							       insn_buf, cnt);
   7728 				if (!new_prog)
   7729 					return -ENOMEM;
   7730 
   7731 				delta    += cnt - 1;
   7732 				env->prog = prog = new_prog;
   7733 				insn      = new_prog->insnsi + i + delta;
   7734 				continue;
   7735 			}
   7736 
   7737 			BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
   7738 				     (void *(*)(struct bpf_map *map, void *key))NULL));
   7739 			BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
   7740 				     (int (*)(struct bpf_map *map, void *key))NULL));
   7741 			BUILD_BUG_ON(!__same_type(ops->map_update_elem,
   7742 				     (int (*)(struct bpf_map *map, void *key, void *value,
   7743 					      u64 flags))NULL));
   7744 			BUILD_BUG_ON(!__same_type(ops->map_push_elem,
   7745 				     (int (*)(struct bpf_map *map, void *value,
   7746 					      u64 flags))NULL));
   7747 			BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
   7748 				     (int (*)(struct bpf_map *map, void *value))NULL));
   7749 			BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
   7750 				     (int (*)(struct bpf_map *map, void *value))NULL));
   7751 
   7752 			switch (insn->imm) {
   7753 			case BPF_FUNC_map_lookup_elem:
   7754 				insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) -
   7755 					    __bpf_call_base;
   7756 				continue;
   7757 			case BPF_FUNC_map_update_elem:
   7758 				insn->imm = BPF_CAST_CALL(ops->map_update_elem) -
   7759 					    __bpf_call_base;
   7760 				continue;
   7761 			case BPF_FUNC_map_delete_elem:
   7762 				insn->imm = BPF_CAST_CALL(ops->map_delete_elem) -
   7763 					    __bpf_call_base;
   7764 				continue;
   7765 			case BPF_FUNC_map_push_elem:
   7766 				insn->imm = BPF_CAST_CALL(ops->map_push_elem) -
   7767 					    __bpf_call_base;
   7768 				continue;
   7769 			case BPF_FUNC_map_pop_elem:
   7770 				insn->imm = BPF_CAST_CALL(ops->map_pop_elem) -
   7771 					    __bpf_call_base;
   7772 				continue;
   7773 			case BPF_FUNC_map_peek_elem:
   7774 				insn->imm = BPF_CAST_CALL(ops->map_peek_elem) -
   7775 					    __bpf_call_base;
   7776 				continue;
   7777 			}
   7778 
   7779 			goto patch_call_imm;
   7780 		}
   7781 
   7782 patch_call_imm:
   7783 		fn = env->ops->get_func_proto(insn->imm, env->prog);
   7784 		/* all functions that have prototype and verifier allowed
   7785 		 * programs to call them, must be real in-kernel functions
   7786 		 */
   7787 		if (!fn->func) {
   7788 			verbose(env,
   7789 				"kernel subsystem misconfigured func %s#%d\n",
   7790 				func_id_name(insn->imm), insn->imm);
   7791 			return -EFAULT;
   7792 		}
   7793 		insn->imm = fn->func - __bpf_call_base;
   7794 	}
   7795 
   7796 	return 0;
   7797 }
   7798 
   7799 static void free_states(struct bpf_verifier_env *env)
   7800 {
   7801 	struct bpf_verifier_state_list *sl, *sln;
   7802 	int i;
   7803 
   7804 	if (!env->explored_states)
   7805 		return;
   7806 
   7807 	for (i = 0; i < env->prog->len; i++) {
   7808 		sl = env->explored_states[i];
   7809 
   7810 		if (sl)
   7811 			while (sl != STATE_LIST_MARK) {
   7812 				sln = sl->next;
   7813 				free_verifier_state(&sl->state, false);
   7814 				kfree(sl);
   7815 				sl = sln;
   7816 			}
   7817 	}
   7818 
   7819 	kfree(env->explored_states);
   7820 }
   7821 
   7822 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
   7823 	      union bpf_attr __user *uattr)
   7824 {
   7825 	struct bpf_verifier_env *env;
   7826 	struct bpf_verifier_log *log;
   7827 	int i, len, ret = -EINVAL;
   7828 	bool is_priv;
   7829 
   7830 	/* no program is valid */
   7831 	if (ARRAY_SIZE(bpf_verifier_ops) == 0)
   7832 		return -EINVAL;
   7833 
   7834 	/* 'struct bpf_verifier_env' can be global, but since it's not small,
   7835 	 * allocate/free it every time bpf_check() is called
   7836 	 */
   7837 	env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
   7838 	if (!env)
   7839 		return -ENOMEM;
   7840 	log = &env->log;
   7841 
   7842 	len = (*prog)->len;
   7843 	env->insn_aux_data =
   7844 		vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
   7845 	ret = -ENOMEM;
   7846 	if (!env->insn_aux_data)
   7847 		goto err_free_env;
   7848 	for (i = 0; i < len; i++)
   7849 		env->insn_aux_data[i].orig_idx = i;
   7850 	env->prog = *prog;
   7851 	env->ops = bpf_verifier_ops[env->prog->type];
   7852 
   7853 	/* grab the mutex to protect few globals used by verifier */
   7854 	mutex_lock(&bpf_verifier_lock);
   7855 
   7856 	if (attr->log_level || attr->log_buf || attr->log_size) {
   7857 		/* user requested verbose verifier output
   7858 		 * and supplied buffer to store the verification trace
   7859 		 */
   7860 		log->level = attr->log_level;
   7861 		log->ubuf = (char __user *) (unsigned long) attr->log_buf;
   7862 		log->len_total = attr->log_size;
   7863 
   7864 		ret = -EINVAL;
   7865 		/* log attributes have to be sane */
   7866 		if (log->len_total < 128 || log->len_total > UINT_MAX >> 8 ||
   7867 		    !log->level || !log->ubuf)
   7868 			goto err_unlock;
   7869 	}
   7870 
   7871 	env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
   7872 	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
   7873 		env->strict_alignment = true;
   7874 	if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
   7875 		env->strict_alignment = false;
   7876 
   7877 	is_priv = capable(CAP_SYS_ADMIN);
   7878 	env->allow_ptr_leaks = is_priv;
   7879 
   7880 	ret = replace_map_fd_with_map_ptr(env);
   7881 	if (ret < 0)
   7882 		goto skip_full_check;
   7883 
   7884 	if (bpf_prog_is_dev_bound(env->prog->aux)) {
   7885 		ret = bpf_prog_offload_verifier_prep(env->prog);
   7886 		if (ret)
   7887 			goto skip_full_check;
   7888 	}
   7889 
   7890 	env->explored_states = kcalloc(env->prog->len,
   7891 				       sizeof(struct bpf_verifier_state_list *),
   7892 				       GFP_USER);
   7893 	ret = -ENOMEM;
   7894 	if (!env->explored_states)
   7895 		goto skip_full_check;
   7896 
   7897 	ret = check_subprogs(env);
   7898 	if (ret < 0)
   7899 		goto skip_full_check;
   7900 
   7901 	ret = check_btf_info(env, attr, uattr);
   7902 	if (ret < 0)
   7903 		goto skip_full_check;
   7904 
   7905 	ret = check_cfg(env);
   7906 	if (ret < 0)
   7907 		goto skip_full_check;
   7908 
   7909 	ret = do_check(env);
   7910 	if (env->cur_state) {
   7911 		free_verifier_state(env->cur_state, true);
   7912 		env->cur_state = NULL;
   7913 	}
   7914 
   7915 	if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux))
   7916 		ret = bpf_prog_offload_finalize(env);
   7917 
   7918 skip_full_check:
   7919 	while (!pop_stack(env, NULL, NULL));
   7920 	free_states(env);
   7921 
   7922 	if (ret == 0)
   7923 		ret = check_max_stack_depth(env);
   7924 
   7925 	/* instruction rewrites happen after this point */
   7926 	if (is_priv) {
   7927 		if (ret == 0)
   7928 			opt_hard_wire_dead_code_branches(env);
   7929 		if (ret == 0)
   7930 			ret = opt_remove_dead_code(env);
   7931 		if (ret == 0)
   7932 			ret = opt_remove_nops(env);
   7933 	} else {
   7934 		if (ret == 0)
   7935 			sanitize_dead_code(env);
   7936 	}
   7937 
   7938 	if (ret == 0)
   7939 		/* program is valid, convert *(u32*)(ctx + off) accesses */
   7940 		ret = convert_ctx_accesses(env);
   7941 
   7942 	if (ret == 0)
   7943 		ret = fixup_bpf_calls(env);
   7944 
   7945 	if (ret == 0)
   7946 		ret = fixup_call_args(env);
   7947 
   7948 	if (log->level && bpf_verifier_log_full(log))
   7949 		ret = -ENOSPC;
   7950 	if (log->level && !log->ubuf) {
   7951 		ret = -EFAULT;
   7952 		goto err_release_maps;
   7953 	}
   7954 
   7955 	if (ret == 0 && env->used_map_cnt) {
   7956 		/* if program passed verifier, update used_maps in bpf_prog_info */
   7957 		env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
   7958 							  sizeof(env->used_maps[0]),
   7959 							  GFP_KERNEL);
   7960 
   7961 		if (!env->prog->aux->used_maps) {
   7962 			ret = -ENOMEM;
   7963 			goto err_release_maps;
   7964 		}
   7965 
   7966 		memcpy(env->prog->aux->used_maps, env->used_maps,
   7967 		       sizeof(env->used_maps[0]) * env->used_map_cnt);
   7968 		env->prog->aux->used_map_cnt = env->used_map_cnt;
   7969 
   7970 		/* program is valid. Convert pseudo bpf_ld_imm64 into generic
   7971 		 * bpf_ld_imm64 instructions
   7972 		 */
   7973 		convert_pseudo_ld_imm64(env);
   7974 	}
   7975 
   7976 	if (ret == 0)
   7977 		adjust_btf_func(env);
   7978 
   7979 err_release_maps:
   7980 	if (!env->prog->aux->used_maps)
   7981 		/* if we didn't copy map pointers into bpf_prog_info, release
   7982 		 * them now. Otherwise free_used_maps() will release them.
   7983 		 */
   7984 		release_maps(env);
   7985 	*prog = env->prog;
   7986 err_unlock:
   7987 	mutex_unlock(&bpf_verifier_lock);
   7988 	vfree(env->insn_aux_data);
   7989 err_free_env:
   7990 	kfree(env);
   7991 	return ret;
   7992 }