whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

queue_stack_maps.c (6983B)


      1 // SPDX-License-Identifier: GPL-2.0
      2 /*
      3  * queue_stack_maps.c: BPF queue and stack maps
      4  *
      5  * Copyright (c) 2018 Politecnico di Torino
      6  */
      7 #include <linux/bpf.h>
      8 #include <linux/list.h>
      9 #include <linux/slab.h>
     10 #include <linux/capability.h>
     11 #include "percpu_freelist.h"
     12 
     13 #define QUEUE_STACK_CREATE_FLAG_MASK \
     14 	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
     15 
     16 
     17 struct bpf_queue_stack {
     18 	struct bpf_map map;
     19 	raw_spinlock_t lock;
     20 	u32 head, tail;
     21 	u32 size; /* max_entries + 1 */
     22 
     23 	char elements[0] __aligned(8);
     24 };
     25 
     26 static struct bpf_queue_stack *bpf_queue_stack(struct bpf_map *map)
     27 {
     28 	return container_of(map, struct bpf_queue_stack, map);
     29 }
     30 
     31 static bool queue_stack_map_is_empty(struct bpf_queue_stack *qs)
     32 {
     33 	return qs->head == qs->tail;
     34 }
     35 
     36 static bool queue_stack_map_is_full(struct bpf_queue_stack *qs)
     37 {
     38 	u32 head = qs->head + 1;
     39 
     40 	if (unlikely(head >= qs->size))
     41 		head = 0;
     42 
     43 	return head == qs->tail;
     44 }
     45 
     46 /* Called from syscall */
     47 static int queue_stack_map_alloc_check(union bpf_attr *attr)
     48 {
     49 	if (!capable(CAP_SYS_ADMIN))
     50 		return -EPERM;
     51 
     52 	/* check sanity of attributes */
     53 	if (attr->max_entries == 0 || attr->key_size != 0 ||
     54 	    attr->value_size == 0 ||
     55 	    attr->map_flags & ~QUEUE_STACK_CREATE_FLAG_MASK)
     56 		return -EINVAL;
     57 
     58 	if (attr->value_size > KMALLOC_MAX_SIZE)
     59 		/* if value_size is bigger, the user space won't be able to
     60 		 * access the elements.
     61 		 */
     62 		return -E2BIG;
     63 
     64 	return 0;
     65 }
     66 
     67 static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr)
     68 {
     69 	int ret, numa_node = bpf_map_attr_numa_node(attr);
     70 	struct bpf_queue_stack *qs;
     71 	u64 size, queue_size, cost;
     72 
     73 	size = (u64) attr->max_entries + 1;
     74 	cost = queue_size = sizeof(*qs) + size * attr->value_size;
     75 	if (cost >= U32_MAX - PAGE_SIZE)
     76 		return ERR_PTR(-E2BIG);
     77 
     78 	cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
     79 
     80 	ret = bpf_map_precharge_memlock(cost);
     81 	if (ret < 0)
     82 		return ERR_PTR(ret);
     83 
     84 	qs = bpf_map_area_alloc(queue_size, numa_node);
     85 	if (!qs)
     86 		return ERR_PTR(-ENOMEM);
     87 
     88 	memset(qs, 0, sizeof(*qs));
     89 
     90 	bpf_map_init_from_attr(&qs->map, attr);
     91 
     92 	qs->map.pages = cost;
     93 	qs->size = size;
     94 
     95 	raw_spin_lock_init(&qs->lock);
     96 
     97 	return &qs->map;
     98 }
     99 
    100 /* Called when map->refcnt goes to zero, either from workqueue or from syscall */
    101 static void queue_stack_map_free(struct bpf_map *map)
    102 {
    103 	struct bpf_queue_stack *qs = bpf_queue_stack(map);
    104 
    105 	/* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
    106 	 * so the programs (can be more than one that used this map) were
    107 	 * disconnected from events. Wait for outstanding critical sections in
    108 	 * these programs to complete
    109 	 */
    110 	synchronize_rcu();
    111 
    112 	bpf_map_area_free(qs);
    113 }
    114 
    115 static int __queue_map_get(struct bpf_map *map, void *value, bool delete)
    116 {
    117 	struct bpf_queue_stack *qs = bpf_queue_stack(map);
    118 	unsigned long flags;
    119 	int err = 0;
    120 	void *ptr;
    121 
    122 	raw_spin_lock_irqsave(&qs->lock, flags);
    123 
    124 	if (queue_stack_map_is_empty(qs)) {
    125 		memset(value, 0, qs->map.value_size);
    126 		err = -ENOENT;
    127 		goto out;
    128 	}
    129 
    130 	ptr = &qs->elements[qs->tail * qs->map.value_size];
    131 	memcpy(value, ptr, qs->map.value_size);
    132 
    133 	if (delete) {
    134 		if (unlikely(++qs->tail >= qs->size))
    135 			qs->tail = 0;
    136 	}
    137 
    138 out:
    139 	raw_spin_unlock_irqrestore(&qs->lock, flags);
    140 	return err;
    141 }
    142 
    143 
    144 static int __stack_map_get(struct bpf_map *map, void *value, bool delete)
    145 {
    146 	struct bpf_queue_stack *qs = bpf_queue_stack(map);
    147 	unsigned long flags;
    148 	int err = 0;
    149 	void *ptr;
    150 	u32 index;
    151 
    152 	raw_spin_lock_irqsave(&qs->lock, flags);
    153 
    154 	if (queue_stack_map_is_empty(qs)) {
    155 		memset(value, 0, qs->map.value_size);
    156 		err = -ENOENT;
    157 		goto out;
    158 	}
    159 
    160 	index = qs->head - 1;
    161 	if (unlikely(index >= qs->size))
    162 		index = qs->size - 1;
    163 
    164 	ptr = &qs->elements[index * qs->map.value_size];
    165 	memcpy(value, ptr, qs->map.value_size);
    166 
    167 	if (delete)
    168 		qs->head = index;
    169 
    170 out:
    171 	raw_spin_unlock_irqrestore(&qs->lock, flags);
    172 	return err;
    173 }
    174 
    175 /* Called from syscall or from eBPF program */
    176 static int queue_map_peek_elem(struct bpf_map *map, void *value)
    177 {
    178 	return __queue_map_get(map, value, false);
    179 }
    180 
    181 /* Called from syscall or from eBPF program */
    182 static int stack_map_peek_elem(struct bpf_map *map, void *value)
    183 {
    184 	return __stack_map_get(map, value, false);
    185 }
    186 
    187 /* Called from syscall or from eBPF program */
    188 static int queue_map_pop_elem(struct bpf_map *map, void *value)
    189 {
    190 	return __queue_map_get(map, value, true);
    191 }
    192 
    193 /* Called from syscall or from eBPF program */
    194 static int stack_map_pop_elem(struct bpf_map *map, void *value)
    195 {
    196 	return __stack_map_get(map, value, true);
    197 }
    198 
    199 /* Called from syscall or from eBPF program */
    200 static int queue_stack_map_push_elem(struct bpf_map *map, void *value,
    201 				     u64 flags)
    202 {
    203 	struct bpf_queue_stack *qs = bpf_queue_stack(map);
    204 	unsigned long irq_flags;
    205 	int err = 0;
    206 	void *dst;
    207 
    208 	/* BPF_EXIST is used to force making room for a new element in case the
    209 	 * map is full
    210 	 */
    211 	bool replace = (flags & BPF_EXIST);
    212 
    213 	/* Check supported flags for queue and stack maps */
    214 	if (flags & BPF_NOEXIST || flags > BPF_EXIST)
    215 		return -EINVAL;
    216 
    217 	raw_spin_lock_irqsave(&qs->lock, irq_flags);
    218 
    219 	if (queue_stack_map_is_full(qs)) {
    220 		if (!replace) {
    221 			err = -E2BIG;
    222 			goto out;
    223 		}
    224 		/* advance tail pointer to overwrite oldest element */
    225 		if (unlikely(++qs->tail >= qs->size))
    226 			qs->tail = 0;
    227 	}
    228 
    229 	dst = &qs->elements[qs->head * qs->map.value_size];
    230 	memcpy(dst, value, qs->map.value_size);
    231 
    232 	if (unlikely(++qs->head >= qs->size))
    233 		qs->head = 0;
    234 
    235 out:
    236 	raw_spin_unlock_irqrestore(&qs->lock, irq_flags);
    237 	return err;
    238 }
    239 
    240 /* Called from syscall or from eBPF program */
    241 static void *queue_stack_map_lookup_elem(struct bpf_map *map, void *key)
    242 {
    243 	return NULL;
    244 }
    245 
    246 /* Called from syscall or from eBPF program */
    247 static int queue_stack_map_update_elem(struct bpf_map *map, void *key,
    248 				       void *value, u64 flags)
    249 {
    250 	return -EINVAL;
    251 }
    252 
    253 /* Called from syscall or from eBPF program */
    254 static int queue_stack_map_delete_elem(struct bpf_map *map, void *key)
    255 {
    256 	return -EINVAL;
    257 }
    258 
    259 /* Called from syscall */
    260 static int queue_stack_map_get_next_key(struct bpf_map *map, void *key,
    261 					void *next_key)
    262 {
    263 	return -EINVAL;
    264 }
    265 
    266 const struct bpf_map_ops queue_map_ops = {
    267 	.map_alloc_check = queue_stack_map_alloc_check,
    268 	.map_alloc = queue_stack_map_alloc,
    269 	.map_free = queue_stack_map_free,
    270 	.map_lookup_elem = queue_stack_map_lookup_elem,
    271 	.map_update_elem = queue_stack_map_update_elem,
    272 	.map_delete_elem = queue_stack_map_delete_elem,
    273 	.map_push_elem = queue_stack_map_push_elem,
    274 	.map_pop_elem = queue_map_pop_elem,
    275 	.map_peek_elem = queue_map_peek_elem,
    276 	.map_get_next_key = queue_stack_map_get_next_key,
    277 };
    278 
    279 const struct bpf_map_ops stack_map_ops = {
    280 	.map_alloc_check = queue_stack_map_alloc_check,
    281 	.map_alloc = queue_stack_map_alloc,
    282 	.map_free = queue_stack_map_free,
    283 	.map_lookup_elem = queue_stack_map_lookup_elem,
    284 	.map_update_elem = queue_stack_map_update_elem,
    285 	.map_delete_elem = queue_stack_map_delete_elem,
    286 	.map_push_elem = queue_stack_map_push_elem,
    287 	.map_pop_elem = stack_map_pop_elem,
    288 	.map_peek_elem = stack_map_peek_elem,
    289 	.map_get_next_key = queue_stack_map_get_next_key,
    290 };