whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit 43d86ee8c639df750529b4d8f062b328b61c423e
parent 645ff1e8e704c4f33ab1fcd3c87f95cb9b6d7144
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Thu,  3 Jan 2019 12:53:47 -0800

Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

Pull networking fixes from David Miller:
 "Several fixes here. Basically split down the line between newly
  introduced regressions and long existing problems:

   1) Double free in tipc_enable_bearer(), from Cong Wang.

   2) Many fixes to nf_conncount, from Florian Westphal.

   3) op->get_regs_len() can throw an error, check it, from Yunsheng
      Lin.

   4) Need to use GFP_ATOMIC in *_add_hash_mac_address() of fsl/fman
      driver, from Scott Wood.

   5) Inifnite loop in fib_empty_table(), from Yue Haibing.

   6) Use after free in ax25_fillin_cb(), from Cong Wang.

   7) Fix socket locking in nr_find_socket(), also from Cong Wang.

   8) Fix WoL wakeup enable in r8169, from Heiner Kallweit.

   9) On 32-bit sock->sk_stamp is not thread-safe, from Deepa Dinamani.

  10) Fix ptr_ring wrap during queue swap, from Cong Wang.

  11) Missing shutdown callback in hinic driver, from Xue Chaojing.

  12) Need to return NULL on error from ip6_neigh_lookup(), from Stefano
      Brivio.

  13) BPF out of bounds speculation fixes from Daniel Borkmann"

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (57 commits)
  ipv6: Consider sk_bound_dev_if when binding a socket to an address
  ipv6: Fix dump of specific table with strict checking
  bpf: add various test cases to selftests
  bpf: prevent out of bounds speculation on pointer arithmetic
  bpf: fix check_map_access smin_value test when pointer contains offset
  bpf: restrict unknown scalars of mixed signed bounds for unprivileged
  bpf: restrict stack pointer arithmetic for unprivileged
  bpf: restrict map value pointer arithmetic for unprivileged
  bpf: enable access to ax register also from verifier rewrite
  bpf: move tmp variable into ax register in interpreter
  bpf: move {prev_,}insn_idx into verifier env
  isdn: fix kernel-infoleak in capi_unlocked_ioctl
  ipv6: route: Fix return value of ip6_neigh_lookup() on neigh_create() error
  net/hamradio/6pack: use mod_timer() to rearm timers
  net-next/hinic:add shutdown callback
  net: hns3: call hns3_nic_net_open() while doing HNAE3_UP_CLIENT
  ip: validate header length on virtual device xmit
  tap: call skb_probe_transport_header after setting skb->dev
  ptr_ring: wrap back ->producer in __ptr_ring_swap_queue()
  net: rds: remove unnecessary NULL check
  ...

Diffstat:
MDocumentation/networking/snmp_counter.rst | 240++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Mdrivers/isdn/capi/kcapi.c | 4++--
Mdrivers/isdn/hisax/hfc_pci.c | 2++
Mdrivers/net/dsa/bcm_sf2.c | 7+++----
Mdrivers/net/ethernet/atheros/atl1e/atl1e_main.c | 4+++-
Mdrivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c | 4++++
Mdrivers/net/ethernet/freescale/fman/fman_memac.c | 2+-
Mdrivers/net/ethernet/freescale/fman/fman_tgec.c | 2+-
Mdrivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 7++++---
Mdrivers/net/ethernet/huawei/hinic/hinic_main.c | 6++++++
Mdrivers/net/ethernet/ibm/ibmveth.c | 6+++++-
Mdrivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 2++
Mdrivers/net/ethernet/realtek/r8169.c | 4++--
Mdrivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c | 4+++-
Mdrivers/net/ethernet/sun/niu.c | 10++++++++--
Mdrivers/net/ethernet/ti/cpts.c | 4+++-
Mdrivers/net/hamradio/6pack.c | 16++++------------
Mdrivers/net/tap.c | 3+--
Mdrivers/net/wan/fsl_ucc_hdlc.c | 1-
Mdrivers/net/wan/x25_asy.c | 2++
Minclude/linux/bpf_verifier.h | 12++++++++++++
Minclude/linux/filter.h | 10+++-------
Minclude/linux/phy.h | 13++++++++-----
Minclude/linux/phy/phy.h | 2+-
Minclude/linux/ptr_ring.h | 2++
Minclude/net/ip_tunnels.h | 20++++++++++++++++++++
Minclude/net/netfilter/nf_conntrack_count.h | 19+++----------------
Minclude/net/sock.h | 38+++++++++++++++++++++++++++++++++++---
Mkernel/bpf/core.c | 54+++++++++++++++++++++++++++++++++++++-----------------
Mkernel/bpf/verifier.c | 336++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------
Mnet/ax25/af_ax25.c | 11+++++++++--
Mnet/ax25/ax25_dev.c | 2++
Mnet/compat.c | 15+++++++++------
Mnet/core/ethtool.c | 12++++++++++--
Mnet/core/rtnetlink.c | 5+++++
Mnet/core/sock.c | 15++++++++++-----
Mnet/ipv4/fib_rules.c | 8++++++--
Mnet/ipv4/ip_gre.c | 9+++++++++
Mnet/ipv4/ip_tunnel.c | 9---------
Mnet/ipv4/ip_vti.c | 12+++++++++---
Mnet/ipv6/addrconf.c | 4++--
Mnet/ipv6/af_inet6.c | 3+++
Mnet/ipv6/ip6_fib.c | 6+++++-
Mnet/ipv6/ip6_gre.c | 10+++++++---
Mnet/ipv6/ip6_tunnel.c | 10+++-------
Mnet/ipv6/ip6_vti.c | 8++++----
Mnet/ipv6/ip6mr.c | 17+++++++++++------
Mnet/ipv6/reassembly.c | 2+-
Mnet/ipv6/route.c | 10++++++++--
Mnet/ipv6/sit.c | 3+++
Mnet/netfilter/nf_conncount.c | 290+++++++++++++++++++++++++++++++++++--------------------------------------------
Mnet/netfilter/nf_tables_api.c | 2++
Mnet/netfilter/nft_connlimit.c | 14+++-----------
Mnet/netrom/af_netrom.c | 15++++++++++-----
Mnet/rds/tcp.c | 2+-
Mnet/sunrpc/svcsock.c | 2+-
Mnet/tipc/bearer.c | 1-
Mnet/tipc/netlink_compat.c | 2++
Mtools/testing/selftests/bpf/test_maps.c | 2+-
Mtools/testing/selftests/bpf/test_verifier.c | 1146+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
60 files changed, 2079 insertions(+), 404 deletions(-)

diff --git a/Documentation/networking/snmp_counter.rst b/Documentation/networking/snmp_counter.rst @@ -571,7 +571,97 @@ duplicate packet is received. * TcpExtTCPDSACKOfoRecv The TCP stack receives a DSACK, which indicate an out of order -duplciate packet is received. +duplicate packet is received. + +TCP out of order +=============== +* TcpExtTCPOFOQueue +The TCP layer receives an out of order packet and has enough memory +to queue it. + +* TcpExtTCPOFODrop +The TCP layer receives an out of order packet but doesn't have enough +memory, so drops it. Such packets won't be counted into +TcpExtTCPOFOQueue. + +* TcpExtTCPOFOMerge +The received out of order packet has an overlay with the previous +packet. the overlay part will be dropped. All of TcpExtTCPOFOMerge +packets will also be counted into TcpExtTCPOFOQueue. + +TCP PAWS +======= +PAWS (Protection Against Wrapped Sequence numbers) is an algorithm +which is used to drop old packets. It depends on the TCP +timestamps. For detail information, please refer the `timestamp wiki`_ +and the `RFC of PAWS`_. + +.. _RFC of PAWS: https://tools.ietf.org/html/rfc1323#page-17 +.. _timestamp wiki: https://en.wikipedia.org/wiki/Transmission_Control_Protocol#TCP_timestamps + +* TcpExtPAWSActive +Packets are dropped by PAWS in Syn-Sent status. + +* TcpExtPAWSEstab +Packets are dropped by PAWS in any status other than Syn-Sent. + +TCP ACK skip +=========== +In some scenarios, kernel would avoid sending duplicate ACKs too +frequently. Please find more details in the tcp_invalid_ratelimit +section of the `sysctl document`_. When kernel decides to skip an ACK +due to tcp_invalid_ratelimit, kernel would update one of below +counters to indicate the ACK is skipped in which scenario. The ACK +would only be skipped if the received packet is either a SYN packet or +it has no data. + +.. _sysctl document: https://www.kernel.org/doc/Documentation/networking/ip-sysctl.txt + +* TcpExtTCPACKSkippedSynRecv +The ACK is skipped in Syn-Recv status. The Syn-Recv status means the +TCP stack receives a SYN and replies SYN+ACK. Now the TCP stack is +waiting for an ACK. Generally, the TCP stack doesn't need to send ACK +in the Syn-Recv status. But in several scenarios, the TCP stack need +to send an ACK. E.g., the TCP stack receives the same SYN packet +repeately, the received packet does not pass the PAWS check, or the +received packet sequence number is out of window. In these scenarios, +the TCP stack needs to send ACK. If the ACk sending frequency is higher than +tcp_invalid_ratelimit allows, the TCP stack will skip sending ACK and +increase TcpExtTCPACKSkippedSynRecv. + + +* TcpExtTCPACKSkippedPAWS +The ACK is skipped due to PAWS (Protect Against Wrapped Sequence +numbers) check fails. If the PAWS check fails in Syn-Recv, Fin-Wait-2 +or Time-Wait statuses, the skipped ACK would be counted to +TcpExtTCPACKSkippedSynRecv, TcpExtTCPACKSkippedFinWait2 or +TcpExtTCPACKSkippedTimeWait. In all other statuses, the skipped ACK +would be counted to TcpExtTCPACKSkippedPAWS. + +* TcpExtTCPACKSkippedSeq +The sequence number is out of window and the timestamp passes the PAWS +check and the TCP status is not Syn-Recv, Fin-Wait-2, and Time-Wait. + +* TcpExtTCPACKSkippedFinWait2 +The ACK is skipped in Fin-Wait-2 status, the reason would be either +PAWS check fails or the received sequence number is out of window. + +* TcpExtTCPACKSkippedTimeWait +Tha ACK is skipped in Time-Wait status, the reason would be either +PAWS check failed or the received sequence number is out of window. + +* TcpExtTCPACKSkippedChallenge +The ACK is skipped if the ACK is a challenge ACK. The RFC 5961 defines +3 kind of challenge ACK, please refer `RFC 5961 section 3.2`_, +`RFC 5961 section 4.2`_ and `RFC 5961 section 5.2`_. Besides these +three scenarios, In some TCP status, the linux TCP stack would also +send challenge ACKs if the ACK number is before the first +unacknowledged number (more strict than `RFC 5961 section 5.2`_). + +.. _RFC 5961 section 3.2: https://tools.ietf.org/html/rfc5961#page-7 +.. _RFC 5961 section 4.2: https://tools.ietf.org/html/rfc5961#page-9 +.. _RFC 5961 section 5.2: https://tools.ietf.org/html/rfc5961#page-11 + examples ======= @@ -1188,3 +1278,151 @@ Run nstat on server B:: We have deleted the default route on server B. Server B couldn't find a route for the 8.8.8.8 IP address, so server B increased IpOutNoRoutes. + +TcpExtTCPACKSkippedSynRecv +------------------------ +In this test, we send 3 same SYN packets from client to server. The +first SYN will let server create a socket, set it to Syn-Recv status, +and reply a SYN/ACK. The second SYN will let server reply the SYN/ACK +again, and record the reply time (the duplicate ACK reply time). The +third SYN will let server check the previous duplicate ACK reply time, +and decide to skip the duplicate ACK, then increase the +TcpExtTCPACKSkippedSynRecv counter. + +Run tcpdump to capture a SYN packet:: + + nstatuser@nstat-a:~$ sudo tcpdump -c 1 -w /tmp/syn.pcap port 9000 + tcpdump: listening on ens3, link-type EN10MB (Ethernet), capture size 262144 bytes + +Open another terminal, run nc command:: + + nstatuser@nstat-a:~$ nc nstat-b 9000 + +As the nstat-b didn't listen on port 9000, it should reply a RST, and +the nc command exited immediately. It was enough for the tcpdump +command to capture a SYN packet. A linux server might use hardware +offload for the TCP checksum, so the checksum in the /tmp/syn.pcap +might be not correct. We call tcprewrite to fix it:: + + nstatuser@nstat-a:~$ tcprewrite --infile=/tmp/syn.pcap --outfile=/tmp/syn_fixcsum.pcap --fixcsum + +On nstat-b, we run nc to listen on port 9000:: + + nstatuser@nstat-b:~$ nc -lkv 9000 + Listening on [0.0.0.0] (family 0, port 9000) + +On nstat-a, we blocked the packet from port 9000, or nstat-a would send +RST to nstat-b:: + + nstatuser@nstat-a:~$ sudo iptables -A INPUT -p tcp --sport 9000 -j DROP + +Send 3 SYN repeatly to nstat-b:: + + nstatuser@nstat-a:~$ for i in {1..3}; do sudo tcpreplay -i ens3 /tmp/syn_fixcsum.pcap; done + +Check snmp cunter on nstat-b:: + + nstatuser@nstat-b:~$ nstat | grep -i skip + TcpExtTCPACKSkippedSynRecv 1 0.0 + +As we expected, TcpExtTCPACKSkippedSynRecv is 1. + +TcpExtTCPACKSkippedPAWS +---------------------- +To trigger PAWS, we could send an old SYN. + +On nstat-b, let nc listen on port 9000:: + + nstatuser@nstat-b:~$ nc -lkv 9000 + Listening on [0.0.0.0] (family 0, port 9000) + +On nstat-a, run tcpdump to capture a SYN:: + + nstatuser@nstat-a:~$ sudo tcpdump -w /tmp/paws_pre.pcap -c 1 port 9000 + tcpdump: listening on ens3, link-type EN10MB (Ethernet), capture size 262144 bytes + +On nstat-a, run nc as a client to connect nstat-b:: + + nstatuser@nstat-a:~$ nc -v nstat-b 9000 + Connection to nstat-b 9000 port [tcp/*] succeeded! + +Now the tcpdump has captured the SYN and exit. We should fix the +checksum:: + + nstatuser@nstat-a:~$ tcprewrite --infile /tmp/paws_pre.pcap --outfile /tmp/paws.pcap --fixcsum + +Send the SYN packet twice:: + + nstatuser@nstat-a:~$ for i in {1..2}; do sudo tcpreplay -i ens3 /tmp/paws.pcap; done + +On nstat-b, check the snmp counter:: + + nstatuser@nstat-b:~$ nstat | grep -i skip + TcpExtTCPACKSkippedPAWS 1 0.0 + +We sent two SYN via tcpreplay, both of them would let PAWS check +failed, the nstat-b replied an ACK for the first SYN, skipped the ACK +for the second SYN, and updated TcpExtTCPACKSkippedPAWS. + +TcpExtTCPACKSkippedSeq +-------------------- +To trigger TcpExtTCPACKSkippedSeq, we send packets which have valid +timestamp (to pass PAWS check) but the sequence number is out of +window. The linux TCP stack would avoid to skip if the packet has +data, so we need a pure ACK packet. To generate such a packet, we +could create two sockets: one on port 9000, another on port 9001. Then +we capture an ACK on port 9001, change the source/destination port +numbers to match the port 9000 socket. Then we could trigger +TcpExtTCPACKSkippedSeq via this packet. + +On nstat-b, open two terminals, run two nc commands to listen on both +port 9000 and port 9001:: + + nstatuser@nstat-b:~$ nc -lkv 9000 + Listening on [0.0.0.0] (family 0, port 9000) + + nstatuser@nstat-b:~$ nc -lkv 9001 + Listening on [0.0.0.0] (family 0, port 9001) + +On nstat-a, run two nc clients:: + + nstatuser@nstat-a:~$ nc -v nstat-b 9000 + Connection to nstat-b 9000 port [tcp/*] succeeded! + + nstatuser@nstat-a:~$ nc -v nstat-b 9001 + Connection to nstat-b 9001 port [tcp/*] succeeded! + +On nstat-a, run tcpdump to capture an ACK:: + + nstatuser@nstat-a:~$ sudo tcpdump -w /tmp/seq_pre.pcap -c 1 dst port 9001 + tcpdump: listening on ens3, link-type EN10MB (Ethernet), capture size 262144 bytes + +On nstat-b, send a packet via the port 9001 socket. E.g. we sent a +string 'foo' in our example:: + + nstatuser@nstat-b:~$ nc -lkv 9001 + Listening on [0.0.0.0] (family 0, port 9001) + Connection from nstat-a 42132 received! + foo + +On nstat-a, the tcpdump should have caputred the ACK. We should check +the source port numbers of the two nc clients:: + + nstatuser@nstat-a:~$ ss -ta '( dport = :9000 || dport = :9001 )' | tee + State Recv-Q Send-Q Local Address:Port Peer Address:Port + ESTAB 0 0 192.168.122.250:50208 192.168.122.251:9000 + ESTAB 0 0 192.168.122.250:42132 192.168.122.251:9001 + +Run tcprewrite, change port 9001 to port 9000, chagne port 42132 to +port 50208:: + + nstatuser@nstat-a:~$ tcprewrite --infile /tmp/seq_pre.pcap --outfile /tmp/seq.pcap -r 9001:9000 -r 42132:50208 --fixcsum + +Now the /tmp/seq.pcap is the packet we need. Send it to nstat-b:: + + nstatuser@nstat-a:~$ for i in {1..2}; do sudo tcpreplay -i ens3 /tmp/seq.pcap; done + +Check TcpExtTCPACKSkippedSeq on nstat-b:: + + nstatuser@nstat-b:~$ nstat | grep -i skip + TcpExtTCPACKSkippedSeq 1 0.0 diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c @@ -852,7 +852,7 @@ u16 capi20_get_manufacturer(u32 contr, u8 *buf) u16 ret; if (contr == 0) { - strlcpy(buf, capi_manufakturer, CAPI_MANUFACTURER_LEN); + strncpy(buf, capi_manufakturer, CAPI_MANUFACTURER_LEN); return CAPI_NOERROR; } @@ -860,7 +860,7 @@ u16 capi20_get_manufacturer(u32 contr, u8 *buf) ctr = get_capi_ctr_by_nr(contr); if (ctr && ctr->state == CAPI_CTR_RUNNING) { - strlcpy(buf, ctr->manu, CAPI_MANUFACTURER_LEN); + strncpy(buf, ctr->manu, CAPI_MANUFACTURER_LEN); ret = CAPI_NOERROR; } else ret = CAPI_REGNOTINSTALLED; diff --git a/drivers/isdn/hisax/hfc_pci.c b/drivers/isdn/hisax/hfc_pci.c @@ -1169,11 +1169,13 @@ HFCPCI_l1hw(struct PStack *st, int pr, void *arg) if (cs->debug & L1_DEB_LAPD) debugl1(cs, "-> PH_REQUEST_PULL"); #endif + spin_lock_irqsave(&cs->lock, flags); if (!cs->tx_skb) { test_and_clear_bit(FLG_L1_PULL_REQ, &st->l1.Flags); st->l1.l1l2(st, PH_PULL | CONFIRM, NULL); } else test_and_set_bit(FLG_L1_PULL_REQ, &st->l1.Flags); + spin_unlock_irqrestore(&cs->lock, flags); break; case (HW_RESET | REQUEST): spin_lock_irqsave(&cs->lock, flags); diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c @@ -303,11 +303,10 @@ static int bcm_sf2_sw_mdio_write(struct mii_bus *bus, int addr, int regnum, * send them to our master MDIO bus controller */ if (addr == BRCM_PSEUDO_PHY_ADDR && priv->indir_phy_mask & BIT(addr)) - bcm_sf2_sw_indir_rw(priv, 0, addr, regnum, val); + return bcm_sf2_sw_indir_rw(priv, 0, addr, regnum, val); else - mdiobus_write_nested(priv->master_mii_bus, addr, regnum, val); - - return 0; + return mdiobus_write_nested(priv->master_mii_bus, addr, + regnum, val); } static irqreturn_t bcm_sf2_switch_0_isr(int irq, void *dev_id) diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c @@ -473,7 +473,9 @@ static void atl1e_mdio_write(struct net_device *netdev, int phy_id, { struct atl1e_adapter *adapter = netdev_priv(netdev); - atl1e_write_phy_reg(&adapter->hw, reg_num & MDIO_REG_ADDR_MASK, val); + if (atl1e_write_phy_reg(&adapter->hw, + reg_num & MDIO_REG_ADDR_MASK, val)) + netdev_err(netdev, "write phy register failed\n"); } static int atl1e_mii_ioctl(struct net_device *netdev, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c @@ -1229,6 +1229,10 @@ int cudbg_collect_hw_sched(struct cudbg_init *pdbg_init, rc = cudbg_get_buff(pdbg_init, dbg_buff, sizeof(struct cudbg_hw_sched), &temp_buff); + + if (rc) + return rc; + hw_sched_buff = (struct cudbg_hw_sched *)temp_buff.data; hw_sched_buff->map = t4_read_reg(padap, TP_TX_MOD_QUEUE_REQ_MAP_A); hw_sched_buff->mode = TIMERMODE_G(t4_read_reg(padap, TP_MOD_CONFIG_A)); diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c b/drivers/net/ethernet/freescale/fman/fman_memac.c @@ -928,7 +928,7 @@ int memac_add_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr) hash = get_mac_addr_hash_code(addr) & HASH_CTRL_ADDR_MASK; /* Create element to be added to the driver hash table */ - hash_entry = kmalloc(sizeof(*hash_entry), GFP_KERNEL); + hash_entry = kmalloc(sizeof(*hash_entry), GFP_ATOMIC); if (!hash_entry) return -ENOMEM; hash_entry->addr = addr; diff --git a/drivers/net/ethernet/freescale/fman/fman_tgec.c b/drivers/net/ethernet/freescale/fman/fman_tgec.c @@ -553,7 +553,7 @@ int tgec_add_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr) hash = (crc >> TGEC_HASH_MCAST_SHIFT) & TGEC_HASH_ADR_MSK; /* Create element to be added to the driver hash table */ - hash_entry = kmalloc(sizeof(*hash_entry), GFP_KERNEL); + hash_entry = kmalloc(sizeof(*hash_entry), GFP_ATOMIC); if (!hash_entry) return -ENOMEM; hash_entry->addr = addr; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -3995,17 +3995,18 @@ static int hns3_reset_notify_up_enet(struct hnae3_handle *handle) struct hns3_nic_priv *priv = netdev_priv(kinfo->netdev); int ret = 0; + clear_bit(HNS3_NIC_STATE_RESETTING, &priv->state); + if (netif_running(kinfo->netdev)) { - ret = hns3_nic_net_up(kinfo->netdev); + ret = hns3_nic_net_open(kinfo->netdev); if (ret) { + set_bit(HNS3_NIC_STATE_RESETTING, &priv->state); netdev_err(kinfo->netdev, "hns net up fail, ret=%d!\n", ret); return ret; } } - clear_bit(HNS3_NIC_STATE_RESETTING, &priv->state); - return ret; } diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c @@ -1106,6 +1106,11 @@ static void hinic_remove(struct pci_dev *pdev) dev_info(&pdev->dev, "HiNIC driver - removed\n"); } +static void hinic_shutdown(struct pci_dev *pdev) +{ + pci_disable_device(pdev); +} + static const struct pci_device_id hinic_pci_table[] = { { PCI_VDEVICE(HUAWEI, HINIC_DEV_ID_QUAD_PORT_25GE), 0}, { PCI_VDEVICE(HUAWEI, HINIC_DEV_ID_DUAL_PORT_25GE), 0}, @@ -1119,6 +1124,7 @@ static struct pci_driver hinic_driver = { .id_table = hinic_pci_table, .probe = hinic_probe, .remove = hinic_remove, + .shutdown = hinic_shutdown, }; module_pci_driver(hinic_driver); diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c @@ -1171,11 +1171,15 @@ out: map_failed_frags: last = i+1; - for (i = 0; i < last; i++) + for (i = 1; i < last; i++) dma_unmap_page(&adapter->vdev->dev, descs[i].fields.address, descs[i].fields.flags_len & IBMVETH_BUF_LEN_MASK, DMA_TO_DEVICE); + dma_unmap_single(&adapter->vdev->dev, + descs[0].fields.address, + descs[0].fields.flags_len & IBMVETH_BUF_LEN_MASK, + DMA_TO_DEVICE); map_failed: if (!firmware_has_feature(FW_FEATURE_CMO)) netdev_err(netdev, "tx: unable to map xmit buffer\n"); diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -5240,6 +5240,8 @@ static int mvpp2_probe(struct platform_device *pdev) if (has_acpi_companion(&pdev->dev)) { acpi_id = acpi_match_device(pdev->dev.driver->acpi_match_table, &pdev->dev); + if (!acpi_id) + return -EINVAL; priv->hw_version = (unsigned long)acpi_id->driver_data; } else { priv->hw_version = diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c @@ -1477,6 +1477,8 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts) } RTL_W8(tp, Cfg9346, Cfg9346_Lock); + + device_set_wakeup_enable(tp_to_dev(tp), wolopts); } static int rtl8169_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) @@ -1498,8 +1500,6 @@ static int rtl8169_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) rtl_unlock_work(tp); - device_set_wakeup_enable(d, tp->saved_wolopts); - pm_runtime_put_noidle(d); return 0; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c @@ -59,7 +59,9 @@ static int sun7i_gmac_init(struct platform_device *pdev, void *priv) gmac->clk_enabled = 1; } else { clk_set_rate(gmac->tx_clk, SUN7I_GMAC_MII_RATE); - clk_prepare(gmac->tx_clk); + ret = clk_prepare(gmac->tx_clk); + if (ret) + return ret; } return 0; diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c @@ -8100,6 +8100,8 @@ static int niu_pci_vpd_scan_props(struct niu *np, u32 start, u32 end) start += 3; prop_len = niu_pci_eeprom_read(np, start + 4); + if (prop_len < 0) + return prop_len; err = niu_pci_vpd_get_propname(np, start + 5, namebuf, 64); if (err < 0) return err; @@ -8144,8 +8146,12 @@ static int niu_pci_vpd_scan_props(struct niu *np, u32 start, u32 end) netif_printk(np, probe, KERN_DEBUG, np->dev, "VPD_SCAN: Reading in property [%s] len[%d]\n", namebuf, prop_len); - for (i = 0; i < prop_len; i++) - *prop_buf++ = niu_pci_eeprom_read(np, off + i); + for (i = 0; i < prop_len; i++) { + err = niu_pci_eeprom_read(np, off + i); + if (err >= 0) + *prop_buf = err; + ++prop_buf; + } } start += len; diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c @@ -590,7 +590,9 @@ struct cpts *cpts_create(struct device *dev, void __iomem *regs, return ERR_CAST(cpts->refclk); } - clk_prepare(cpts->refclk); + ret = clk_prepare(cpts->refclk); + if (ret) + return ERR_PTR(ret); cpts->cc.read = cpts_systim_read; cpts->cc.mask = CLOCKSOURCE_MASK(32); diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c @@ -523,10 +523,7 @@ static void resync_tnc(struct timer_list *t) /* Start resync timer again -- the TNC might be still absent */ - - del_timer(&sp->resync_t); - sp->resync_t.expires = jiffies + SIXP_RESYNC_TIMEOUT; - add_timer(&sp->resync_t); + mod_timer(&sp->resync_t, jiffies + SIXP_RESYNC_TIMEOUT); } static inline int tnc_init(struct sixpack *sp) @@ -537,9 +534,7 @@ static inline int tnc_init(struct sixpack *sp) sp->tty->ops->write(sp->tty, &inbyte, 1); - del_timer(&sp->resync_t); - sp->resync_t.expires = jiffies + SIXP_RESYNC_TIMEOUT; - add_timer(&sp->resync_t); + mod_timer(&sp->resync_t, jiffies + SIXP_RESYNC_TIMEOUT); return 0; } @@ -897,11 +892,8 @@ static void decode_prio_command(struct sixpack *sp, unsigned char cmd) /* if the state byte has been received, the TNC is present, so the resync timer can be reset. */ - if (sp->tnc_state == TNC_IN_SYNC) { - del_timer(&sp->resync_t); - sp->resync_t.expires = jiffies + SIXP_INIT_RESYNC_TIMEOUT; - add_timer(&sp->resync_t); - } + if (sp->tnc_state == TNC_IN_SYNC) + mod_timer(&sp->resync_t, jiffies + SIXP_INIT_RESYNC_TIMEOUT); sp->status1 = cmd & SIXP_PRIO_DATA_MASK; } diff --git a/drivers/net/tap.c b/drivers/net/tap.c @@ -1177,8 +1177,6 @@ static int tap_get_user_xdp(struct tap_queue *q, struct xdp_buff *xdp) goto err_kfree; } - skb_probe_transport_header(skb, ETH_HLEN); - /* Move network header to the right position for VLAN tagged packets */ if ((skb->protocol == htons(ETH_P_8021Q) || skb->protocol == htons(ETH_P_8021AD)) && @@ -1189,6 +1187,7 @@ static int tap_get_user_xdp(struct tap_queue *q, struct xdp_buff *xdp) tap = rcu_dereference(q->tap); if (tap) { skb->dev = tap->dev; + skb_probe_transport_header(skb, ETH_HLEN); dev_queue_xmit(skb); } else { kfree_skb(skb); diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c @@ -1180,7 +1180,6 @@ static int ucc_hdlc_probe(struct platform_device *pdev) if (register_hdlc_device(dev)) { ret = -ENOBUFS; pr_err("ucc_hdlc: unable to register hdlc device\n"); - free_netdev(dev); goto free_dev; } diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c @@ -485,8 +485,10 @@ static int x25_asy_open(struct net_device *dev) /* Cleanup */ kfree(sl->xbuff); + sl->xbuff = NULL; noxbuff: kfree(sl->rbuff); + sl->rbuff = NULL; norbuff: return -ENOMEM; } diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h @@ -148,6 +148,7 @@ struct bpf_verifier_state { /* call stack tracking */ struct bpf_func_state *frame[MAX_CALL_FRAMES]; u32 curframe; + bool speculative; }; #define bpf_get_spilled_reg(slot, frame) \ @@ -167,15 +168,24 @@ struct bpf_verifier_state_list { struct bpf_verifier_state_list *next; }; +/* Possible states for alu_state member. */ +#define BPF_ALU_SANITIZE_SRC 1U +#define BPF_ALU_SANITIZE_DST 2U +#define BPF_ALU_NEG_VALUE (1U << 2) +#define BPF_ALU_SANITIZE (BPF_ALU_SANITIZE_SRC | \ + BPF_ALU_SANITIZE_DST) + struct bpf_insn_aux_data { union { enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ unsigned long map_state; /* pointer/poison value for maps */ s32 call_imm; /* saved imm field of call insn */ + u32 alu_limit; /* limit for add/sub register with pointer */ }; int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ int sanitize_stack_off; /* stack slot to be cleared */ bool seen; /* this insn was processed by the verifier */ + u8 alu_state; /* used in combination with alu_limit */ }; #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ @@ -212,6 +222,8 @@ struct bpf_subprog_info { * one verifier_env per bpf_check() call */ struct bpf_verifier_env { + u32 insn_idx; + u32 prev_insn_idx; struct bpf_prog *prog; /* eBPF program being verified */ const struct bpf_verifier_ops *ops; struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */ diff --git a/include/linux/filter.h b/include/linux/filter.h @@ -53,14 +53,10 @@ struct sock_reuseport; #define BPF_REG_D BPF_REG_8 /* data, callee-saved */ #define BPF_REG_H BPF_REG_9 /* hlen, callee-saved */ -/* Kernel hidden auxiliary/helper register for hardening step. - * Only used by eBPF JITs. It's nothing more than a temporary - * register that JITs use internally, only that here it's part - * of eBPF instructions that have been rewritten for blinding - * constants. See JIT pre-step in bpf_jit_blind_constants(). - */ +/* Kernel hidden auxiliary/helper register. */ #define BPF_REG_AX MAX_BPF_REG -#define MAX_BPF_JIT_REG (MAX_BPF_REG + 1) +#define MAX_BPF_EXT_REG (MAX_BPF_REG + 1) +#define MAX_BPF_JIT_REG MAX_BPF_EXT_REG /* unused opcode to mark special call to bpf_tail_call() helper */ #define BPF_TAIL_CALL 0xf0 diff --git a/include/linux/phy.h b/include/linux/phy.h @@ -1,6 +1,6 @@ /* * Framework and drivers for configuring and reading different PHYs - * Based on code in sungem_phy.c and gianfar_phy.c + * Based on code in sungem_phy.c and (long-removed) gianfar_phy.c * * Author: Andy Fleming * @@ -110,9 +110,9 @@ typedef enum { * @speeds: buffer to store supported speeds in. * @size: size of speeds buffer. * - * Description: Returns the number of supported speeds, and - * fills the speeds * buffer with the supported speeds. If speeds buffer is - * too small to contain * all currently supported speeds, will return as + * Description: Returns the number of supported speeds, and fills + * the speeds buffer with the supported speeds. If speeds buffer is + * too small to contain all currently supported speeds, will return as * many speeds as can fit. */ unsigned int phy_supported_speeds(struct phy_device *phy, @@ -120,7 +120,10 @@ unsigned int phy_supported_speeds(struct phy_device *phy, unsigned int size); /** - * It maps 'enum phy_interface_t' found in include/linux/phy.h + * phy_modes - map phy_interface_t enum to device tree binding of phy-mode + * @interface: enum phy_interface_t value + * + * Description: maps 'enum phy_interface_t' defined in this file * into the device tree binding of 'phy-mode', so that Ethernet * device driver can get phy interface from device tree. */ diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h @@ -110,6 +110,7 @@ struct phy_ops { /** * struct phy_attrs - represents phy attributes * @bus_width: Data path width implemented by PHY + * @mode: PHY mode */ struct phy_attrs { u32 bus_width; @@ -121,7 +122,6 @@ struct phy_attrs { * @dev: phy device * @id: id of the phy device * @ops: function pointers for performing phy operations - * @init_data: list of PHY consumers (non-dt only) * @mutex: mutex to protect phy_ops * @init_count: used to protect when the PHY is used by multiple consumers * @power_count: used to protect when the PHY is used by multiple consumers diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h @@ -573,6 +573,8 @@ static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue, else if (destroy) destroy(ptr); + if (producer >= size) + producer = 0; __ptr_ring_set_size(r, size); r->producer = producer; r->consumer_head = 0; diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h @@ -308,6 +308,26 @@ int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *op, int ip_tunnel_encap_setup(struct ip_tunnel *t, struct ip_tunnel_encap *ipencap); +static inline bool pskb_inet_may_pull(struct sk_buff *skb) +{ + int nhlen; + + switch (skb->protocol) { +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + nhlen = sizeof(struct ipv6hdr); + break; +#endif + case htons(ETH_P_IP): + nhlen = sizeof(struct iphdr); + break; + default: + nhlen = 0; + } + + return pskb_network_may_pull(skb, nhlen); +} + static inline int ip_encap_hlen(struct ip_tunnel_encap *e) { const struct ip_tunnel_encap_ops *ops; diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h @@ -5,17 +5,10 @@ struct nf_conncount_data; -enum nf_conncount_list_add { - NF_CONNCOUNT_ADDED, /* list add was ok */ - NF_CONNCOUNT_ERR, /* -ENOMEM, must drop skb */ - NF_CONNCOUNT_SKIP, /* list is already reclaimed by gc */ -}; - struct nf_conncount_list { spinlock_t list_lock; struct list_head head; /* connections with the same filtering key */ unsigned int count; /* length of list */ - bool dead; }; struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family, @@ -29,18 +22,12 @@ unsigned int nf_conncount_count(struct net *net, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_zone *zone); -void nf_conncount_lookup(struct net *net, struct nf_conncount_list *list, - const struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_zone *zone, - bool *addit); +int nf_conncount_add(struct net *net, struct nf_conncount_list *list, + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_zone *zone); void nf_conncount_list_init(struct nf_conncount_list *list); -enum nf_conncount_list_add -nf_conncount_add(struct nf_conncount_list *list, - const struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_zone *zone); - bool nf_conncount_gc_list(struct net *net, struct nf_conncount_list *list); diff --git a/include/net/sock.h b/include/net/sock.h @@ -298,6 +298,7 @@ struct sock_common { * @sk_filter: socket filtering instructions * @sk_timer: sock cleanup timer * @sk_stamp: time stamp of last packet received + * @sk_stamp_seq: lock for accessing sk_stamp on 32 bit architectures only * @sk_tsflags: SO_TIMESTAMPING socket options * @sk_tskey: counter to disambiguate concurrent tstamp requests * @sk_zckey: counter to order MSG_ZEROCOPY notifications @@ -474,6 +475,9 @@ struct sock { const struct cred *sk_peer_cred; long sk_rcvtimeo; ktime_t sk_stamp; +#if BITS_PER_LONG==32 + seqlock_t sk_stamp_seq; +#endif u16 sk_tsflags; u8 sk_shutdown; u32 sk_tskey; @@ -2297,6 +2301,34 @@ static inline void sk_drops_add(struct sock *sk, const struct sk_buff *skb) atomic_add(segs, &sk->sk_drops); } +static inline ktime_t sock_read_timestamp(struct sock *sk) +{ +#if BITS_PER_LONG==32 + unsigned int seq; + ktime_t kt; + + do { + seq = read_seqbegin(&sk->sk_stamp_seq); + kt = sk->sk_stamp; + } while (read_seqretry(&sk->sk_stamp_seq, seq)); + + return kt; +#else + return sk->sk_stamp; +#endif +} + +static inline void sock_write_timestamp(struct sock *sk, ktime_t kt) +{ +#if BITS_PER_LONG==32 + write_seqlock(&sk->sk_stamp_seq); + sk->sk_stamp = kt; + write_sequnlock(&sk->sk_stamp_seq); +#else + sk->sk_stamp = kt; +#endif +} + void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb); void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk, @@ -2321,7 +2353,7 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE))) __sock_recv_timestamp(msg, sk, skb); else - sk->sk_stamp = kt; + sock_write_timestamp(sk, kt); if (sock_flag(sk, SOCK_WIFI_STATUS) && skb->wifi_acked_valid) __sock_recv_wifi_status(msg, sk, skb); @@ -2342,9 +2374,9 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, if (sk->sk_flags & FLAGS_TS_OR_DROPS || sk->sk_tsflags & TSFLAGS_ANY) __sock_recv_ts_and_drops(msg, sk, skb); else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP))) - sk->sk_stamp = skb->tstamp; + sock_write_timestamp(sk, skb->tstamp); else if (unlikely(sk->sk_stamp == SK_DEFAULT_STAMP)) - sk->sk_stamp = 0; + sock_write_timestamp(sk, 0); } void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c @@ -54,6 +54,7 @@ #define DST regs[insn->dst_reg] #define SRC regs[insn->src_reg] #define FP regs[BPF_REG_FP] +#define AX regs[BPF_REG_AX] #define ARG1 regs[BPF_REG_ARG1] #define CTX regs[BPF_REG_CTX] #define IMM insn->imm @@ -857,6 +858,26 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from, BUILD_BUG_ON(BPF_REG_AX + 1 != MAX_BPF_JIT_REG); BUILD_BUG_ON(MAX_BPF_REG + 1 != MAX_BPF_JIT_REG); + /* Constraints on AX register: + * + * AX register is inaccessible from user space. It is mapped in + * all JITs, and used here for constant blinding rewrites. It is + * typically "stateless" meaning its contents are only valid within + * the executed instruction, but not across several instructions. + * There are a few exceptions however which are further detailed + * below. + * + * Constant blinding is only used by JITs, not in the interpreter. + * The interpreter uses AX in some occasions as a local temporary + * register e.g. in DIV or MOD instructions. + * + * In restricted circumstances, the verifier can also use the AX + * register for rewrites as long as they do not interfere with + * the above cases! + */ + if (from->dst_reg == BPF_REG_AX || from->src_reg == BPF_REG_AX) + goto out; + if (from->imm == 0 && (from->code == (BPF_ALU | BPF_MOV | BPF_K) || from->code == (BPF_ALU64 | BPF_MOV | BPF_K))) { @@ -1188,7 +1209,6 @@ bool bpf_opcode_in_insntable(u8 code) */ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack) { - u64 tmp; #define BPF_INSN_2_LBL(x, y) [BPF_##x | BPF_##y] = &&x##_##y #define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z static const void *jumptable[256] = { @@ -1268,36 +1288,36 @@ select_insn: (*(s64 *) &DST) >>= IMM; CONT; ALU64_MOD_X: - div64_u64_rem(DST, SRC, &tmp); - DST = tmp; + div64_u64_rem(DST, SRC, &AX); + DST = AX; CONT; ALU_MOD_X: - tmp = (u32) DST; - DST = do_div(tmp, (u32) SRC); + AX = (u32) DST; + DST = do_div(AX, (u32) SRC); CONT; ALU64_MOD_K: - div64_u64_rem(DST, IMM, &tmp); - DST = tmp; + div64_u64_rem(DST, IMM, &AX); + DST = AX; CONT; ALU_MOD_K: - tmp = (u32) DST; - DST = do_div(tmp, (u32) IMM); + AX = (u32) DST; + DST = do_div(AX, (u32) IMM); CONT; ALU64_DIV_X: DST = div64_u64(DST, SRC); CONT; ALU_DIV_X: - tmp = (u32) DST; - do_div(tmp, (u32) SRC); - DST = (u32) tmp; + AX = (u32) DST; + do_div(AX, (u32) SRC); + DST = (u32) AX; CONT; ALU64_DIV_K: DST = div64_u64(DST, IMM); CONT; ALU_DIV_K: - tmp = (u32) DST; - do_div(tmp, (u32) IMM); - DST = (u32) tmp; + AX = (u32) DST; + do_div(AX, (u32) IMM); + DST = (u32) AX; CONT; ALU_END_TO_BE: switch (IMM) { @@ -1553,7 +1573,7 @@ STACK_FRAME_NON_STANDARD(___bpf_prog_run); /* jump table */ static unsigned int PROG_NAME(stack_size)(const void *ctx, const struct bpf_insn *insn) \ { \ u64 stack[stack_size / sizeof(u64)]; \ - u64 regs[MAX_BPF_REG]; \ + u64 regs[MAX_BPF_EXT_REG]; \ \ FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \ ARG1 = (u64) (unsigned long) ctx; \ @@ -1566,7 +1586,7 @@ static u64 PROG_NAME_ARGS(stack_size)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, \ const struct bpf_insn *insn) \ { \ u64 stack[stack_size / sizeof(u64)]; \ - u64 regs[MAX_BPF_REG]; \ + u64 regs[MAX_BPF_EXT_REG]; \ \ FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \ BPF_R1 = r1; \ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c @@ -710,6 +710,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, free_func_state(dst_state->frame[i]); dst_state->frame[i] = NULL; } + dst_state->speculative = src->speculative; dst_state->curframe = src->curframe; for (i = 0; i <= src->curframe; i++) { dst = dst_state->frame[i]; @@ -754,7 +755,8 @@ static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx, } static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, - int insn_idx, int prev_insn_idx) + int insn_idx, int prev_insn_idx, + bool speculative) { struct bpf_verifier_state *cur = env->cur_state; struct bpf_verifier_stack_elem *elem; @@ -772,6 +774,7 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, err = copy_verifier_state(&elem->st, cur); if (err) goto err; + elem->st.speculative |= speculative; if (env->stack_size > BPF_COMPLEXITY_LIMIT_STACK) { verbose(env, "BPF program is too complex\n"); goto err; @@ -1387,6 +1390,31 @@ static int check_stack_read(struct bpf_verifier_env *env, } } +static int check_stack_access(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, + int off, int size) +{ + /* Stack accesses must be at a fixed offset, so that we + * can determine what type of data were returned. See + * check_stack_read(). + */ + if (!tnum_is_const(reg->var_off)) { + char tn_buf[48]; + + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); + verbose(env, "variable stack access var_off=%s off=%d size=%d", + tn_buf, off, size); + return -EACCES; + } + + if (off >= 0 || off < -MAX_BPF_STACK) { + verbose(env, "invalid stack off=%d size=%d\n", off, size); + return -EACCES; + } + + return 0; +} + /* check read/write into map element returned by bpf_map_lookup_elem() */ static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off, int size, bool zero_size_allowed) @@ -1418,13 +1446,17 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno, */ if (env->log.level) print_verifier_state(env, state); + /* The minimum value is only important with signed * comparisons where we can't assume the floor of a * value is 0. If we are using signed variables for our * index'es we need to make sure that whatever we use * will have a set floor within our range. */ - if (reg->smin_value < 0) { + if (reg->smin_value < 0 && + (reg->smin_value == S64_MIN || + (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) || + reg->smin_value + off < 0)) { verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", regno); return -EACCES; @@ -1954,24 +1986,10 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn } } else if (reg->type == PTR_TO_STACK) { - /* stack accesses must be at a fixed offset, so that we can - * determine what type of data were returned. - * See check_stack_read(). - */ - if (!tnum_is_const(reg->var_off)) { - char tn_buf[48]; - - tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); - verbose(env, "variable stack access var_off=%s off=%d size=%d", - tn_buf, off, size); - return -EACCES; - } off += reg->var_off.value; - if (off >= 0 || off < -MAX_BPF_STACK) { - verbose(env, "invalid stack off=%d size=%d\n", off, - size); - return -EACCES; - } + err = check_stack_access(env, reg, off, size); + if (err) + return err; state = func(env, reg); err = update_stack_depth(env, state, off); @@ -3052,6 +3070,102 @@ static bool check_reg_sane_offset(struct bpf_verifier_env *env, return true; } +static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env) +{ + return &env->insn_aux_data[env->insn_idx]; +} + +static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, + u32 *ptr_limit, u8 opcode, bool off_is_neg) +{ + bool mask_to_left = (opcode == BPF_ADD && off_is_neg) || + (opcode == BPF_SUB && !off_is_neg); + u32 off; + + switch (ptr_reg->type) { + case PTR_TO_STACK: + off = ptr_reg->off + ptr_reg->var_off.value; + if (mask_to_left) + *ptr_limit = MAX_BPF_STACK + off; + else + *ptr_limit = -off; + return 0; + case PTR_TO_MAP_VALUE: + if (mask_to_left) { + *ptr_limit = ptr_reg->umax_value + ptr_reg->off; + } else { + off = ptr_reg->smin_value + ptr_reg->off; + *ptr_limit = ptr_reg->map_ptr->value_size - off; + } + return 0; + default: + return -EINVAL; + } +} + +static int sanitize_ptr_alu(struct bpf_verifier_env *env, + struct bpf_insn *insn, + const struct bpf_reg_state *ptr_reg, + struct bpf_reg_state *dst_reg, + bool off_is_neg) +{ + struct bpf_verifier_state *vstate = env->cur_state; + struct bpf_insn_aux_data *aux = cur_aux(env); + bool ptr_is_dst_reg = ptr_reg == dst_reg; + u8 opcode = BPF_OP(insn->code); + u32 alu_state, alu_limit; + struct bpf_reg_state tmp; + bool ret; + + if (env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K) + return 0; + + /* We already marked aux for masking from non-speculative + * paths, thus we got here in the first place. We only care + * to explore bad access from here. + */ + if (vstate->speculative) + goto do_sim; + + alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0; + alu_state |= ptr_is_dst_reg ? + BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST; + + if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg)) + return 0; + + /* If we arrived here from different branches with different + * limits to sanitize, then this won't work. + */ + if (aux->alu_state && + (aux->alu_state != alu_state || + aux->alu_limit != alu_limit)) + return -EACCES; + + /* Corresponding fixup done in fixup_bpf_calls(). */ + aux->alu_state = alu_state; + aux->alu_limit = alu_limit; + +do_sim: + /* Simulate and find potential out-of-bounds access under + * speculative execution from truncation as a result of + * masking when off was not within expected range. If off + * sits in dst, then we temporarily need to move ptr there + * to simulate dst (== 0) +/-= ptr. Needed, for example, + * for cases where we use K-based arithmetic in one direction + * and truncated reg-based in the other in order to explore + * bad access. + */ + if (!ptr_is_dst_reg) { + tmp = *dst_reg; + *dst_reg = *ptr_reg; + } + ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true); + if (!ptr_is_dst_reg) + *dst_reg = tmp; + return !ret ? -EFAULT : 0; +} + /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off. * Caller should also handle BPF_MOV case separately. * If we return -EACCES, caller may want to try again treating pointer as a @@ -3070,8 +3184,9 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value; u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value, umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value; + u32 dst = insn->dst_reg, src = insn->src_reg; u8 opcode = BPF_OP(insn->code); - u32 dst = insn->dst_reg; + int ret; dst_reg = &regs[dst]; @@ -3104,6 +3219,13 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, verbose(env, "R%d pointer arithmetic on %s prohibited\n", dst, reg_type_str[ptr_reg->type]); return -EACCES; + case PTR_TO_MAP_VALUE: + if (!env->allow_ptr_leaks && !known && (smin_val < 0) != (smax_val < 0)) { + verbose(env, "R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n", + off_reg == dst_reg ? dst : src); + return -EACCES; + } + /* fall-through */ default: break; } @@ -3120,6 +3242,11 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, switch (opcode) { case BPF_ADD: + ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0); + if (ret < 0) { + verbose(env, "R%d tried to add from different maps or paths\n", dst); + return ret; + } /* We can take a fixed offset as long as it doesn't overflow * the s32 'off' field */ @@ -3170,6 +3297,11 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, } break; case BPF_SUB: + ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0); + if (ret < 0) { + verbose(env, "R%d tried to sub from different maps or paths\n", dst); + return ret; + } if (dst_reg == off_reg) { /* scalar -= pointer. Creates an unknown scalar */ verbose(env, "R%d tried to subtract pointer from scalar\n", @@ -3249,6 +3381,25 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, __update_reg_bounds(dst_reg); __reg_deduce_bounds(dst_reg); __reg_bound_offset(dst_reg); + + /* For unprivileged we require that resulting offset must be in bounds + * in order to be able to sanitize access later on. + */ + if (!env->allow_ptr_leaks) { + if (dst_reg->type == PTR_TO_MAP_VALUE && + check_map_access(env, dst, dst_reg->off, 1, false)) { + verbose(env, "R%d pointer arithmetic of map value goes out of range, " + "prohibited for !root\n", dst); + return -EACCES; + } else if (dst_reg->type == PTR_TO_STACK && + check_stack_access(env, dst_reg, dst_reg->off + + dst_reg->var_off.value, 1)) { + verbose(env, "R%d stack pointer arithmetic goes out of range, " + "prohibited for !root\n", dst); + return -EACCES; + } + } + return 0; } @@ -4348,7 +4499,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, } } - other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx); + other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx, + false); if (!other_branch) return -EFAULT; other_branch_regs = other_branch->frame[other_branch->curframe]->regs; @@ -5458,6 +5610,12 @@ static bool states_equal(struct bpf_verifier_env *env, if (old->curframe != cur->curframe) return false; + /* Verification state from speculative execution simulation + * must never prune a non-speculative execution one. + */ + if (old->speculative && !cur->speculative) + return false; + /* for states to be equal callsites have to be the same * and all frame states need to be equivalent */ @@ -5650,7 +5808,6 @@ static int do_check(struct bpf_verifier_env *env) struct bpf_insn *insns = env->prog->insnsi; struct bpf_reg_state *regs; int insn_cnt = env->prog->len, i; - int insn_idx, prev_insn_idx = 0; int insn_processed = 0; bool do_print_state = false; @@ -5660,6 +5817,7 @@ static int do_check(struct bpf_verifier_env *env) if (!state) return -ENOMEM; state->curframe = 0; + state->speculative = false; state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL); if (!state->frame[0]) { kfree(state); @@ -5670,19 +5828,19 @@ static int do_check(struct bpf_verifier_env *env) BPF_MAIN_FUNC /* callsite */, 0 /* frameno */, 0 /* subprogno, zero == main subprog */); - insn_idx = 0; + for (;;) { struct bpf_insn *insn; u8 class; int err; - if (insn_idx >= insn_cnt) { + if (env->insn_idx >= insn_cnt) { verbose(env, "invalid insn idx %d insn_cnt %d\n", - insn_idx, insn_cnt); + env->insn_idx, insn_cnt); return -EFAULT; } - insn = &insns[insn_idx]; + insn = &insns[env->insn_idx]; class = BPF_CLASS(insn->code); if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) { @@ -5692,17 +5850,19 @@ static int do_check(struct bpf_verifier_env *env) return -E2BIG; } - err = is_state_visited(env, insn_idx); + err = is_state_visited(env, env->insn_idx); if (err < 0) return err; if (err == 1) { /* found equivalent state, can prune the search */ if (env->log.level) { if (do_print_state) - verbose(env, "\nfrom %d to %d: safe\n", - prev_insn_idx, insn_idx); + verbose(env, "\nfrom %d to %d%s: safe\n", + env->prev_insn_idx, env->insn_idx, + env->cur_state->speculative ? + " (speculative execution)" : ""); else - verbose(env, "%d: safe\n", insn_idx); + verbose(env, "%d: safe\n", env->insn_idx); } goto process_bpf_exit; } @@ -5715,10 +5875,12 @@ static int do_check(struct bpf_verifier_env *env) if (env->log.level > 1 || (env->log.level && do_print_state)) { if (env->log.level > 1) - verbose(env, "%d:", insn_idx); + verbose(env, "%d:", env->insn_idx); else - verbose(env, "\nfrom %d to %d:", - prev_insn_idx, insn_idx); + verbose(env, "\nfrom %d to %d%s:", + env->prev_insn_idx, env->insn_idx, + env->cur_state->speculative ? + " (speculative execution)" : ""); print_verifier_state(env, state->frame[state->curframe]); do_print_state = false; } @@ -5729,20 +5891,20 @@ static int do_check(struct bpf_verifier_env *env) .private_data = env, }; - verbose_linfo(env, insn_idx, "; "); - verbose(env, "%d: ", insn_idx); + verbose_linfo(env, env->insn_idx, "; "); + verbose(env, "%d: ", env->insn_idx); print_bpf_insn(&cbs, insn, env->allow_ptr_leaks); } if (bpf_prog_is_dev_bound(env->prog->aux)) { - err = bpf_prog_offload_verify_insn(env, insn_idx, - prev_insn_idx); + err = bpf_prog_offload_verify_insn(env, env->insn_idx, + env->prev_insn_idx); if (err) return err; } regs = cur_regs(env); - env->insn_aux_data[insn_idx].seen = true; + env->insn_aux_data[env->insn_idx].seen = true; if (class == BPF_ALU || class == BPF_ALU64) { err = check_alu_op(env, insn); @@ -5768,13 +5930,13 @@ static int do_check(struct bpf_verifier_env *env) /* check that memory (src_reg + off) is readable, * the state of dst_reg will be updated by this func */ - err = check_mem_access(env, insn_idx, insn->src_reg, insn->off, - BPF_SIZE(insn->code), BPF_READ, - insn->dst_reg, false); + err = check_mem_access(env, env->insn_idx, insn->src_reg, + insn->off, BPF_SIZE(insn->code), + BPF_READ, insn->dst_reg, false); if (err) return err; - prev_src_type = &env->insn_aux_data[insn_idx].ptr_type; + prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type; if (*prev_src_type == NOT_INIT) { /* saw a valid insn @@ -5799,10 +5961,10 @@ static int do_check(struct bpf_verifier_env *env) enum bpf_reg_type *prev_dst_type, dst_reg_type; if (BPF_MODE(insn->code) == BPF_XADD) { - err = check_xadd(env, insn_idx, insn); + err = check_xadd(env, env->insn_idx, insn); if (err) return err; - insn_idx++; + env->insn_idx++; continue; } @@ -5818,13 +5980,13 @@ static int do_check(struct bpf_verifier_env *env) dst_reg_type = regs[insn->dst_reg].type; /* check that memory (dst_reg + off) is writeable */ - err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, - BPF_SIZE(insn->code), BPF_WRITE, - insn->src_reg, false); + err = check_mem_access(env, env->insn_idx, insn->dst_reg, + insn->off, BPF_SIZE(insn->code), + BPF_WRITE, insn->src_reg, false); if (err) return err; - prev_dst_type = &env->insn_aux_data[insn_idx].ptr_type; + prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type; if (*prev_dst_type == NOT_INIT) { *prev_dst_type = dst_reg_type; @@ -5852,9 +6014,9 @@ static int do_check(struct bpf_verifier_env *env) } /* check that memory (dst_reg + off) is writeable */ - err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, - BPF_SIZE(insn->code), BPF_WRITE, - -1, false); + err = check_mem_access(env, env->insn_idx, insn->dst_reg, + insn->off, BPF_SIZE(insn->code), + BPF_WRITE, -1, false); if (err) return err; @@ -5872,9 +6034,9 @@ static int do_check(struct bpf_verifier_env *env) } if (insn->src_reg == BPF_PSEUDO_CALL) - err = check_func_call(env, insn, &insn_idx); + err = check_func_call(env, insn, &env->insn_idx); else - err = check_helper_call(env, insn->imm, insn_idx); + err = check_helper_call(env, insn->imm, env->insn_idx); if (err) return err; @@ -5887,7 +6049,7 @@ static int do_check(struct bpf_verifier_env *env) return -EINVAL; } - insn_idx += insn->off + 1; + env->insn_idx += insn->off + 1; continue; } else if (opcode == BPF_EXIT) { @@ -5901,8 +6063,8 @@ static int do_check(struct bpf_verifier_env *env) if (state->curframe) { /* exit from nested function */ - prev_insn_idx = insn_idx; - err = prepare_func_exit(env, &insn_idx); + env->prev_insn_idx = env->insn_idx; + err = prepare_func_exit(env, &env->insn_idx); if (err) return err; do_print_state = true; @@ -5932,7 +6094,8 @@ static int do_check(struct bpf_verifier_env *env) if (err) return err; process_bpf_exit: - err = pop_stack(env, &prev_insn_idx, &insn_idx); + err = pop_stack(env, &env->prev_insn_idx, + &env->insn_idx); if (err < 0) { if (err != -ENOENT) return err; @@ -5942,7 +6105,7 @@ process_bpf_exit: continue; } } else { - err = check_cond_jmp_op(env, insn, &insn_idx); + err = check_cond_jmp_op(env, insn, &env->insn_idx); if (err) return err; } @@ -5959,8 +6122,8 @@ process_bpf_exit: if (err) return err; - insn_idx++; - env->insn_aux_data[insn_idx].seen = true; + env->insn_idx++; + env->insn_aux_data[env->insn_idx].seen = true; } else { verbose(env, "invalid BPF_LD mode\n"); return -EINVAL; @@ -5970,7 +6133,7 @@ process_bpf_exit: return -EINVAL; } - insn_idx++; + env->insn_idx++; } verbose(env, "processed %d insns (limit %d), stack depth ", @@ -6709,6 +6872,57 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) continue; } + if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) || + insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) { + const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X; + const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X; + struct bpf_insn insn_buf[16]; + struct bpf_insn *patch = &insn_buf[0]; + bool issrc, isneg; + u32 off_reg; + + aux = &env->insn_aux_data[i + delta]; + if (!aux->alu_state) + continue; + + isneg = aux->alu_state & BPF_ALU_NEG_VALUE; + issrc = (aux->alu_state & BPF_ALU_SANITIZE) == + BPF_ALU_SANITIZE_SRC; + + off_reg = issrc ? insn->src_reg : insn->dst_reg; + if (isneg) + *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1); + *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit - 1); + *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg); + *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg); + *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0); + *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63); + if (issrc) { + *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, + off_reg); + insn->src_reg = BPF_REG_AX; + } else { + *patch++ = BPF_ALU64_REG(BPF_AND, off_reg, + BPF_REG_AX); + } + if (isneg) + insn->code = insn->code == code_add ? + code_sub : code_add; + *patch++ = *insn; + if (issrc && isneg) + *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1); + cnt = patch - insn_buf; + + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + continue; + } + if (insn->code != (BPF_JMP | BPF_CALL)) continue; if (insn->src_reg == BPF_PSEUDO_CALL) diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c @@ -653,15 +653,22 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, break; } - dev = dev_get_by_name(&init_net, devname); + rtnl_lock(); + dev = __dev_get_by_name(&init_net, devname); if (!dev) { + rtnl_unlock(); res = -ENODEV; break; } ax25->ax25_dev = ax25_dev_ax25dev(dev); + if (!ax25->ax25_dev) { + rtnl_unlock(); + res = -ENODEV; + break; + } ax25_fillin_cb(ax25, ax25->ax25_dev); - dev_put(dev); + rtnl_unlock(); break; default: diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c @@ -116,6 +116,7 @@ void ax25_dev_device_down(struct net_device *dev) if ((s = ax25_dev_list) == ax25_dev) { ax25_dev_list = s->next; spin_unlock_bh(&ax25_dev_lock); + dev->ax25_ptr = NULL; dev_put(dev); kfree(ax25_dev); return; @@ -125,6 +126,7 @@ void ax25_dev_device_down(struct net_device *dev) if (s->next == ax25_dev) { s->next = ax25_dev->next; spin_unlock_bh(&ax25_dev_lock); + dev->ax25_ptr = NULL; dev_put(dev); kfree(ax25_dev); return; diff --git a/net/compat.c b/net/compat.c @@ -467,12 +467,14 @@ int compat_sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) ctv = (struct compat_timeval __user *) userstamp; err = -ENOENT; sock_enable_timestamp(sk, SOCK_TIMESTAMP); - tv = ktime_to_timeval(sk->sk_stamp); + tv = ktime_to_timeval(sock_read_timestamp(sk)); + if (tv.tv_sec == -1) return err; if (tv.tv_sec == 0) { - sk->sk_stamp = ktime_get_real(); - tv = ktime_to_timeval(sk->sk_stamp); + ktime_t kt = ktime_get_real(); + sock_write_timestamp(sk, kt); + tv = ktime_to_timeval(kt); } err = 0; if (put_user(tv.tv_sec, &ctv->tv_sec) || @@ -494,12 +496,13 @@ int compat_sock_get_timestampns(struct sock *sk, struct timespec __user *usersta ctv = (struct compat_timespec __user *) userstamp; err = -ENOENT; sock_enable_timestamp(sk, SOCK_TIMESTAMP); - ts = ktime_to_timespec(sk->sk_stamp); + ts = ktime_to_timespec(sock_read_timestamp(sk)); if (ts.tv_sec == -1) return err; if (ts.tv_sec == 0) { - sk->sk_stamp = ktime_get_real(); - ts = ktime_to_timespec(sk->sk_stamp); + ktime_t kt = ktime_get_real(); + sock_write_timestamp(sk, kt); + ts = ktime_to_timespec(kt); } err = 0; if (put_user(ts.tv_sec, &ctv->tv_sec) || diff --git a/net/core/ethtool.c b/net/core/ethtool.c @@ -793,8 +793,13 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, if (rc >= 0) info.n_priv_flags = rc; } - if (ops->get_regs_len) - info.regdump_len = ops->get_regs_len(dev); + if (ops->get_regs_len) { + int ret = ops->get_regs_len(dev); + + if (ret > 0) + info.regdump_len = ret; + } + if (ops->get_eeprom_len) info.eedump_len = ops->get_eeprom_len(dev); @@ -1337,6 +1342,9 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) return -EFAULT; reglen = ops->get_regs_len(dev); + if (reglen <= 0) + return reglen; + if (regs.len > reglen) regs.len = reglen; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c @@ -4104,6 +4104,11 @@ static int rtnl_fdb_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, if (err < 0) return err; + if (!addr) { + NL_SET_ERR_MSG(extack, "Missing lookup address for fdb get request"); + return -EINVAL; + } + if (brport_idx) { dev = __dev_get_by_index(net, brport_idx); if (!dev) { diff --git a/net/core/sock.c b/net/core/sock.c @@ -2751,6 +2751,9 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; sk->sk_stamp = SK_DEFAULT_STAMP; +#if BITS_PER_LONG==32 + seqlock_init(&sk->sk_stamp_seq); +#endif atomic_set(&sk->sk_zckey, 0); #ifdef CONFIG_NET_RX_BUSY_POLL @@ -2850,12 +2853,13 @@ int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) struct timeval tv; sock_enable_timestamp(sk, SOCK_TIMESTAMP); - tv = ktime_to_timeval(sk->sk_stamp); + tv = ktime_to_timeval(sock_read_timestamp(sk)); if (tv.tv_sec == -1) return -ENOENT; if (tv.tv_sec == 0) { - sk->sk_stamp = ktime_get_real(); - tv = ktime_to_timeval(sk->sk_stamp); + ktime_t kt = ktime_get_real(); + sock_write_timestamp(sk, kt); + tv = ktime_to_timeval(kt); } return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0; } @@ -2866,11 +2870,12 @@ int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp) struct timespec ts; sock_enable_timestamp(sk, SOCK_TIMESTAMP); - ts = ktime_to_timespec(sk->sk_stamp); + ts = ktime_to_timespec(sock_read_timestamp(sk)); if (ts.tv_sec == -1) return -ENOENT; if (ts.tv_sec == 0) { - sk->sk_stamp = ktime_get_real(); + ktime_t kt = ktime_get_real(); + sock_write_timestamp(sk, kt); ts = ktime_to_timespec(sk->sk_stamp); } return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0; diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c @@ -198,11 +198,15 @@ static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) static struct fib_table *fib_empty_table(struct net *net) { - u32 id; + u32 id = 1; - for (id = 1; id <= RT_TABLE_MAX; id++) + while (1) { if (!fib_get_table(net, id)) return fib_new_table(net, id); + + if (id++ == RT_TABLE_MAX) + break; + } return NULL; } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c @@ -676,6 +676,9 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb, struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *tnl_params; + if (!pskb_inet_may_pull(skb)) + goto free_skb; + if (tunnel->collect_md) { gre_fb_xmit(skb, dev, skb->protocol); return NETDEV_TX_OK; @@ -719,6 +722,9 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb, struct ip_tunnel *tunnel = netdev_priv(dev); bool truncate = false; + if (!pskb_inet_may_pull(skb)) + goto free_skb; + if (tunnel->collect_md) { erspan_fb_xmit(skb, dev, skb->protocol); return NETDEV_TX_OK; @@ -762,6 +768,9 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb, { struct ip_tunnel *tunnel = netdev_priv(dev); + if (!pskb_inet_may_pull(skb)) + goto free_skb; + if (tunnel->collect_md) { gre_fb_xmit(skb, dev, htons(ETH_P_TEB)); return NETDEV_TX_OK; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c @@ -627,7 +627,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, u8 protocol) { struct ip_tunnel *tunnel = netdev_priv(dev); - unsigned int inner_nhdr_len = 0; const struct iphdr *inner_iph; struct flowi4 fl4; u8 tos, ttl; @@ -637,14 +636,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, __be32 dst; bool connected; - /* ensure we can access the inner net header, for several users below */ - if (skb->protocol == htons(ETH_P_IP)) - inner_nhdr_len = sizeof(struct iphdr); - else if (skb->protocol == htons(ETH_P_IPV6)) - inner_nhdr_len = sizeof(struct ipv6hdr); - if (unlikely(!pskb_may_pull(skb, inner_nhdr_len))) - goto tx_error; - inner_iph = (const struct iphdr *)skb_inner_network_header(skb); connected = (tunnel->parms.iph.daddr != 0); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c @@ -241,6 +241,9 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) struct ip_tunnel *tunnel = netdev_priv(dev); struct flowi fl; + if (!pskb_inet_may_pull(skb)) + goto tx_err; + memset(&fl, 0, sizeof(fl)); switch (skb->protocol) { @@ -253,15 +256,18 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); break; default: - dev->stats.tx_errors++; - dev_kfree_skb(skb); - return NETDEV_TX_OK; + goto tx_err; } /* override mark with tunnel output key */ fl.flowi_mark = be32_to_cpu(tunnel->parms.o_key); return vti_xmit(skb, dev, &fl); + +tx_err: + dev->stats.tx_errors++; + kfree_skb(skb); + return NETDEV_TX_OK; } static int vti4_err(struct sk_buff *skb, u32 info) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c @@ -4736,8 +4736,8 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, IFA_F_MCAUTOJOIN | IFA_F_OPTIMISTIC; idev = ipv6_find_idev(dev); - if (IS_ERR(idev)) - return PTR_ERR(idev); + if (!idev) + return -ENOBUFS; if (!ipv6_allow_optimistic_dad(net, idev)) cfg.ifa_flags &= ~IFA_F_OPTIMISTIC; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c @@ -350,6 +350,9 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, err = -EINVAL; goto out_unlock; } + } + + if (sk->sk_bound_dev_if) { dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); if (!dev) { err = -ENODEV; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c @@ -627,7 +627,11 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) return -ENOENT; } - res = fib6_dump_table(tb, skb, cb); + if (!cb->args[0]) { + res = fib6_dump_table(tb, skb, cb); + if (!res) + cb->args[0] = 1; + } goto out; } diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c @@ -881,6 +881,9 @@ static netdev_tx_t ip6gre_tunnel_xmit(struct sk_buff *skb, struct net_device_stats *stats = &t->dev->stats; int ret; + if (!pskb_inet_may_pull(skb)) + goto tx_err; + if (!ip6_tnl_xmit_ctl(t, &t->parms.laddr, &t->parms.raddr)) goto tx_err; @@ -923,6 +926,9 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, int nhoff; int thoff; + if (!pskb_inet_may_pull(skb)) + goto tx_err; + if (!ip6_tnl_xmit_ctl(t, &t->parms.laddr, &t->parms.raddr)) goto tx_err; @@ -995,8 +1001,6 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, goto tx_err; } } else { - struct ipv6hdr *ipv6h = ipv6_hdr(skb); - switch (skb->protocol) { case htons(ETH_P_IP): memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); @@ -1004,7 +1008,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, &dsfield, &encap_limit); break; case htons(ETH_P_IPV6): - if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr)) + if (ipv6_addr_equal(&t->parms.raddr, &ipv6_hdr(skb)->saddr)) goto tx_err; if (prepare_ip6gre_xmit_ipv6(skb, dev, &fl6, &dsfield, &encap_limit)) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c @@ -1243,10 +1243,6 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) u8 tproto; int err; - /* ensure we can access the full inner ip header */ - if (!pskb_may_pull(skb, sizeof(struct iphdr))) - return -1; - iph = ip_hdr(skb); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); @@ -1321,9 +1317,6 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) u8 tproto; int err; - if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) - return -1; - ipv6h = ipv6_hdr(skb); tproto = READ_ONCE(t->parms.proto); if ((tproto != IPPROTO_IPV6 && tproto != 0) || @@ -1405,6 +1398,9 @@ ip6_tnl_start_xmit(struct sk_buff *skb, struct net_device *dev) struct net_device_stats *stats = &t->dev->stats; int ret; + if (!pskb_inet_may_pull(skb)) + goto tx_err; + switch (skb->protocol) { case htons(ETH_P_IP): ret = ip4ip6_tnl_xmit(skb, dev); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c @@ -522,18 +522,18 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); struct net_device_stats *stats = &t->dev->stats; - struct ipv6hdr *ipv6h; struct flowi fl; int ret; + if (!pskb_inet_may_pull(skb)) + goto tx_err; + memset(&fl, 0, sizeof(fl)); switch (skb->protocol) { case htons(ETH_P_IPV6): - ipv6h = ipv6_hdr(skb); - if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) || - vti6_addr_conflict(t, ipv6h)) + vti6_addr_conflict(t, ipv6_hdr(skb))) goto tx_err; xfrm_decode_session(skb, &fl, AF_INET6); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c @@ -51,6 +51,7 @@ #include <linux/export.h> #include <net/ip6_checksum.h> #include <linux/netconf.h> +#include <net/ip_tunnels.h> #include <linux/nospec.h> @@ -599,13 +600,12 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, .flowi6_mark = skb->mark, }; - int err; - err = ip6mr_fib_lookup(net, &fl6, &mrt); - if (err < 0) { - kfree_skb(skb); - return err; - } + if (!pskb_inet_may_pull(skb)) + goto tx_err; + + if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) + goto tx_err; read_lock(&mrt_lock); dev->stats.tx_bytes += skb->len; @@ -614,6 +614,11 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, read_unlock(&mrt_lock); kfree_skb(skb); return NETDEV_TX_OK; + +tx_err: + dev->stats.tx_errors++; + kfree_skb(skb); + return NETDEV_TX_OK; } static int reg_vif_get_iflink(const struct net_device *dev) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c @@ -210,7 +210,7 @@ found: if (next && next->ip_defrag_offset < end) goto discard_fq; - /* Note : skb->ip_defrag_offset and skb->dev share the same location */ + /* Note : skb->ip_defrag_offset and skb->sk share the same location */ dev = skb->dev; if (dev) fq->iif = dev->ifindex; diff --git a/net/ipv6/route.c b/net/ipv6/route.c @@ -210,7 +210,9 @@ struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw, n = __ipv6_neigh_lookup(dev, daddr); if (n) return n; - return neigh_create(&nd_tbl, daddr, dev); + + n = neigh_create(&nd_tbl, daddr, dev); + return IS_ERR(n) ? NULL : n; } static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst, @@ -5054,12 +5056,16 @@ int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write, { struct net *net; int delay; + int ret; if (!write) return -EINVAL; net = (struct net *)ctl->extra1; delay = net->ipv6.sysctl.flush_delay; - proc_dointvec(ctl, write, buffer, lenp, ppos); + ret = proc_dointvec(ctl, write, buffer, lenp, ppos); + if (ret) + return ret; + fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0); return 0; } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c @@ -1021,6 +1021,9 @@ tx_error: static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { + if (!pskb_inet_may_pull(skb)) + goto tx_err; + switch (skb->protocol) { case htons(ETH_P_IP): sit_tunnel_xmit__(skb, dev, IPPROTO_IPIP); diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c @@ -33,12 +33,6 @@ #define CONNCOUNT_SLOTS 256U -#ifdef CONFIG_LOCKDEP -#define CONNCOUNT_LOCK_SLOTS 8U -#else -#define CONNCOUNT_LOCK_SLOTS 256U -#endif - #define CONNCOUNT_GC_MAX_NODES 8 #define MAX_KEYLEN 5 @@ -49,8 +43,6 @@ struct nf_conncount_tuple { struct nf_conntrack_zone zone; int cpu; u32 jiffies32; - bool dead; - struct rcu_head rcu_head; }; struct nf_conncount_rb { @@ -60,7 +52,7 @@ struct nf_conncount_rb { struct rcu_head rcu_head; }; -static spinlock_t nf_conncount_locks[CONNCOUNT_LOCK_SLOTS] __cacheline_aligned_in_smp; +static spinlock_t nf_conncount_locks[CONNCOUNT_SLOTS] __cacheline_aligned_in_smp; struct nf_conncount_data { unsigned int keylen; @@ -89,79 +81,25 @@ static int key_diff(const u32 *a, const u32 *b, unsigned int klen) return memcmp(a, b, klen * sizeof(u32)); } -enum nf_conncount_list_add -nf_conncount_add(struct nf_conncount_list *list, - const struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_zone *zone) -{ - struct nf_conncount_tuple *conn; - - if (WARN_ON_ONCE(list->count > INT_MAX)) - return NF_CONNCOUNT_ERR; - - conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC); - if (conn == NULL) - return NF_CONNCOUNT_ERR; - - conn->tuple = *tuple; - conn->zone = *zone; - conn->cpu = raw_smp_processor_id(); - conn->jiffies32 = (u32)jiffies; - conn->dead = false; - spin_lock_bh(&list->list_lock); - if (list->dead == true) { - kmem_cache_free(conncount_conn_cachep, conn); - spin_unlock_bh(&list->list_lock); - return NF_CONNCOUNT_SKIP; - } - list_add_tail(&conn->node, &list->head); - list->count++; - spin_unlock_bh(&list->list_lock); - return NF_CONNCOUNT_ADDED; -} -EXPORT_SYMBOL_GPL(nf_conncount_add); - -static void __conn_free(struct rcu_head *h) -{ - struct nf_conncount_tuple *conn; - - conn = container_of(h, struct nf_conncount_tuple, rcu_head); - kmem_cache_free(conncount_conn_cachep, conn); -} - -static bool conn_free(struct nf_conncount_list *list, +static void conn_free(struct nf_conncount_list *list, struct nf_conncount_tuple *conn) { - bool free_entry = false; - - spin_lock_bh(&list->list_lock); - - if (conn->dead) { - spin_unlock_bh(&list->list_lock); - return free_entry; - } + lockdep_assert_held(&list->list_lock); list->count--; - conn->dead = true; - list_del_rcu(&conn->node); - if (list->count == 0) { - list->dead = true; - free_entry = true; - } + list_del(&conn->node); - spin_unlock_bh(&list->list_lock); - call_rcu(&conn->rcu_head, __conn_free); - return free_entry; + kmem_cache_free(conncount_conn_cachep, conn); } static const struct nf_conntrack_tuple_hash * find_or_evict(struct net *net, struct nf_conncount_list *list, - struct nf_conncount_tuple *conn, bool *free_entry) + struct nf_conncount_tuple *conn) { const struct nf_conntrack_tuple_hash *found; unsigned long a, b; int cpu = raw_smp_processor_id(); - __s32 age; + u32 age; found = nf_conntrack_find_get(net, &conn->zone, &conn->tuple); if (found) @@ -176,52 +114,45 @@ find_or_evict(struct net *net, struct nf_conncount_list *list, */ age = a - b; if (conn->cpu == cpu || age >= 2) { - *free_entry = conn_free(list, conn); + conn_free(list, conn); return ERR_PTR(-ENOENT); } return ERR_PTR(-EAGAIN); } -void nf_conncount_lookup(struct net *net, - struct nf_conncount_list *list, - const struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_zone *zone, - bool *addit) +static int __nf_conncount_add(struct net *net, + struct nf_conncount_list *list, + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_zone *zone) { const struct nf_conntrack_tuple_hash *found; struct nf_conncount_tuple *conn, *conn_n; struct nf_conn *found_ct; unsigned int collect = 0; - bool free_entry = false; - - /* best effort only */ - *addit = tuple ? true : false; /* check the saved connections */ list_for_each_entry_safe(conn, conn_n, &list->head, node) { if (collect > CONNCOUNT_GC_MAX_NODES) break; - found = find_or_evict(net, list, conn, &free_entry); + found = find_or_evict(net, list, conn); if (IS_ERR(found)) { /* Not found, but might be about to be confirmed */ if (PTR_ERR(found) == -EAGAIN) { - if (!tuple) - continue; - if (nf_ct_tuple_equal(&conn->tuple, tuple) && nf_ct_zone_id(&conn->zone, conn->zone.dir) == nf_ct_zone_id(zone, zone->dir)) - *addit = false; - } else if (PTR_ERR(found) == -ENOENT) + return 0; /* already exists */ + } else { collect++; + } continue; } found_ct = nf_ct_tuplehash_to_ctrack(found); - if (tuple && nf_ct_tuple_equal(&conn->tuple, tuple) && + if (nf_ct_tuple_equal(&conn->tuple, tuple) && nf_ct_zone_equal(found_ct, zone, zone->dir)) { /* * We should not see tuples twice unless someone hooks @@ -229,7 +160,8 @@ void nf_conncount_lookup(struct net *net, * * Attempt to avoid a re-add in this case. */ - *addit = false; + nf_ct_put(found_ct); + return 0; } else if (already_closed(found_ct)) { /* * we do not care about connections which are @@ -243,19 +175,48 @@ void nf_conncount_lookup(struct net *net, nf_ct_put(found_ct); } + + if (WARN_ON_ONCE(list->count > INT_MAX)) + return -EOVERFLOW; + + conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC); + if (conn == NULL) + return -ENOMEM; + + conn->tuple = *tuple; + conn->zone = *zone; + conn->cpu = raw_smp_processor_id(); + conn->jiffies32 = (u32)jiffies; + list_add_tail(&conn->node, &list->head); + list->count++; + return 0; } -EXPORT_SYMBOL_GPL(nf_conncount_lookup); + +int nf_conncount_add(struct net *net, + struct nf_conncount_list *list, + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_zone *zone) +{ + int ret; + + /* check the saved connections */ + spin_lock_bh(&list->list_lock); + ret = __nf_conncount_add(net, list, tuple, zone); + spin_unlock_bh(&list->list_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(nf_conncount_add); void nf_conncount_list_init(struct nf_conncount_list *list) { spin_lock_init(&list->list_lock); INIT_LIST_HEAD(&list->head); list->count = 0; - list->dead = false; } EXPORT_SYMBOL_GPL(nf_conncount_list_init); -/* Return true if the list is empty */ +/* Return true if the list is empty. Must be called with BH disabled. */ bool nf_conncount_gc_list(struct net *net, struct nf_conncount_list *list) { @@ -263,17 +224,17 @@ bool nf_conncount_gc_list(struct net *net, struct nf_conncount_tuple *conn, *conn_n; struct nf_conn *found_ct; unsigned int collected = 0; - bool free_entry = false; bool ret = false; + /* don't bother if other cpu is already doing GC */ + if (!spin_trylock(&list->list_lock)) + return false; + list_for_each_entry_safe(conn, conn_n, &list->head, node) { - found = find_or_evict(net, list, conn, &free_entry); + found = find_or_evict(net, list, conn); if (IS_ERR(found)) { - if (PTR_ERR(found) == -ENOENT) { - if (free_entry) - return true; + if (PTR_ERR(found) == -ENOENT) collected++; - } continue; } @@ -284,23 +245,19 @@ bool nf_conncount_gc_list(struct net *net, * closed already -> ditch it */ nf_ct_put(found_ct); - if (conn_free(list, conn)) - return true; + conn_free(list, conn); collected++; continue; } nf_ct_put(found_ct); if (collected > CONNCOUNT_GC_MAX_NODES) - return false; + break; } - spin_lock_bh(&list->list_lock); - if (!list->count) { - list->dead = true; + if (!list->count) ret = true; - } - spin_unlock_bh(&list->list_lock); + spin_unlock(&list->list_lock); return ret; } @@ -314,6 +271,7 @@ static void __tree_nodes_free(struct rcu_head *h) kmem_cache_free(conncount_rb_cachep, rbconn); } +/* caller must hold tree nf_conncount_locks[] lock */ static void tree_nodes_free(struct rb_root *root, struct nf_conncount_rb *gc_nodes[], unsigned int gc_count) @@ -323,8 +281,10 @@ static void tree_nodes_free(struct rb_root *root, while (gc_count) { rbconn = gc_nodes[--gc_count]; spin_lock(&rbconn->list.list_lock); - rb_erase(&rbconn->node, root); - call_rcu(&rbconn->rcu_head, __tree_nodes_free); + if (!rbconn->list.count) { + rb_erase(&rbconn->node, root); + call_rcu(&rbconn->rcu_head, __tree_nodes_free); + } spin_unlock(&rbconn->list.list_lock); } } @@ -341,20 +301,19 @@ insert_tree(struct net *net, struct rb_root *root, unsigned int hash, const u32 *key, - u8 keylen, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_zone *zone) { - enum nf_conncount_list_add ret; struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES]; struct rb_node **rbnode, *parent; struct nf_conncount_rb *rbconn; struct nf_conncount_tuple *conn; unsigned int count = 0, gc_count = 0; - bool node_found = false; - - spin_lock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]); + u8 keylen = data->keylen; + bool do_gc = true; + spin_lock_bh(&nf_conncount_locks[hash]); +restart: parent = NULL; rbnode = &(root->rb_node); while (*rbnode) { @@ -368,45 +327,32 @@ insert_tree(struct net *net, } else if (diff > 0) { rbnode = &((*rbnode)->rb_right); } else { - /* unlikely: other cpu added node already */ - node_found = true; - ret = nf_conncount_add(&rbconn->list, tuple, zone); - if (ret == NF_CONNCOUNT_ERR) { + int ret; + + ret = nf_conncount_add(net, &rbconn->list, tuple, zone); + if (ret) count = 0; /* hotdrop */ - } else if (ret == NF_CONNCOUNT_ADDED) { + else count = rbconn->list.count; - } else { - /* NF_CONNCOUNT_SKIP, rbconn is already - * reclaimed by gc, insert a new tree node - */ - node_found = false; - } - break; + tree_nodes_free(root, gc_nodes, gc_count); + goto out_unlock; } if (gc_count >= ARRAY_SIZE(gc_nodes)) continue; - if (nf_conncount_gc_list(net, &rbconn->list)) + if (do_gc && nf_conncount_gc_list(net, &rbconn->list)) gc_nodes[gc_count++] = rbconn; } if (gc_count) { tree_nodes_free(root, gc_nodes, gc_count); - /* tree_node_free before new allocation permits - * allocator to re-use newly free'd object. - * - * This is a rare event; in most cases we will find - * existing node to re-use. (or gc_count is 0). - */ - - if (gc_count >= ARRAY_SIZE(gc_nodes)) - schedule_gc_worker(data, hash); + schedule_gc_worker(data, hash); + gc_count = 0; + do_gc = false; + goto restart; } - if (node_found) - goto out_unlock; - /* expected case: match, insert new node */ rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC); if (rbconn == NULL) @@ -430,7 +376,7 @@ insert_tree(struct net *net, rb_link_node_rcu(&rbconn->node, parent, rbnode); rb_insert_color(&rbconn->node, root); out_unlock: - spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]); + spin_unlock_bh(&nf_conncount_locks[hash]); return count; } @@ -441,7 +387,6 @@ count_tree(struct net *net, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_zone *zone) { - enum nf_conncount_list_add ret; struct rb_root *root; struct rb_node *parent; struct nf_conncount_rb *rbconn; @@ -454,7 +399,6 @@ count_tree(struct net *net, parent = rcu_dereference_raw(root->rb_node); while (parent) { int diff; - bool addit; rbconn = rb_entry(parent, struct nf_conncount_rb, node); @@ -464,31 +408,36 @@ count_tree(struct net *net, } else if (diff > 0) { parent = rcu_dereference_raw(parent->rb_right); } else { - /* same source network -> be counted! */ - nf_conncount_lookup(net, &rbconn->list, tuple, zone, - &addit); + int ret; - if (!addit) + if (!tuple) { + nf_conncount_gc_list(net, &rbconn->list); return rbconn->list.count; + } - ret = nf_conncount_add(&rbconn->list, tuple, zone); - if (ret == NF_CONNCOUNT_ERR) { - return 0; /* hotdrop */ - } else if (ret == NF_CONNCOUNT_ADDED) { - return rbconn->list.count; - } else { - /* NF_CONNCOUNT_SKIP, rbconn is already - * reclaimed by gc, insert a new tree node - */ + spin_lock_bh(&rbconn->list.list_lock); + /* Node might be about to be free'd. + * We need to defer to insert_tree() in this case. + */ + if (rbconn->list.count == 0) { + spin_unlock_bh(&rbconn->list.list_lock); break; } + + /* same source network -> be counted! */ + ret = __nf_conncount_add(net, &rbconn->list, tuple, zone); + spin_unlock_bh(&rbconn->list.list_lock); + if (ret) + return 0; /* hotdrop */ + else + return rbconn->list.count; } } if (!tuple) return 0; - return insert_tree(net, data, root, hash, key, keylen, tuple, zone); + return insert_tree(net, data, root, hash, key, tuple, zone); } static void tree_gc_worker(struct work_struct *work) @@ -499,27 +448,47 @@ static void tree_gc_worker(struct work_struct *work) struct rb_node *node; unsigned int tree, next_tree, gc_count = 0; - tree = data->gc_tree % CONNCOUNT_LOCK_SLOTS; + tree = data->gc_tree % CONNCOUNT_SLOTS; root = &data->root[tree]; + local_bh_disable(); rcu_read_lock(); for (node = rb_first(root); node != NULL; node = rb_next(node)) { rbconn = rb_entry(node, struct nf_conncount_rb, node); if (nf_conncount_gc_list(data->net, &rbconn->list)) - gc_nodes[gc_count++] = rbconn; + gc_count++; } rcu_read_unlock(); + local_bh_enable(); + + cond_resched(); spin_lock_bh(&nf_conncount_locks[tree]); + if (gc_count < ARRAY_SIZE(gc_nodes)) + goto next; /* do not bother */ - if (gc_count) { - tree_nodes_free(root, gc_nodes, gc_count); + gc_count = 0; + node = rb_first(root); + while (node != NULL) { + rbconn = rb_entry(node, struct nf_conncount_rb, node); + node = rb_next(node); + + if (rbconn->list.count > 0) + continue; + + gc_nodes[gc_count++] = rbconn; + if (gc_count >= ARRAY_SIZE(gc_nodes)) { + tree_nodes_free(root, gc_nodes, gc_count); + gc_count = 0; + } } + tree_nodes_free(root, gc_nodes, gc_count); +next: clear_bit(tree, data->pending_trees); next_tree = (tree + 1) % CONNCOUNT_SLOTS; - next_tree = find_next_bit(data->pending_trees, next_tree, CONNCOUNT_SLOTS); + next_tree = find_next_bit(data->pending_trees, CONNCOUNT_SLOTS, next_tree); if (next_tree < CONNCOUNT_SLOTS) { data->gc_tree = next_tree; @@ -621,10 +590,7 @@ static int __init nf_conncount_modinit(void) { int i; - BUILD_BUG_ON(CONNCOUNT_LOCK_SLOTS > CONNCOUNT_SLOTS); - BUILD_BUG_ON((CONNCOUNT_SLOTS % CONNCOUNT_LOCK_SLOTS) != 0); - - for (i = 0; i < CONNCOUNT_LOCK_SLOTS; ++i) + for (i = 0; i < CONNCOUNT_SLOTS; ++i) spin_lock_init(&nf_conncount_locks[i]); conncount_conn_cachep = kmem_cache_create("nf_conncount_tuple", diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c @@ -5727,6 +5727,8 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net, goto nla_put_failure; nest = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK); + if (!nest) + goto nla_put_failure; if (nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_NUM, htonl(flowtable->hooknum)) || nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_PRIORITY, htonl(flowtable->priority))) goto nla_put_failure; diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c @@ -30,7 +30,6 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv, enum ip_conntrack_info ctinfo; const struct nf_conn *ct; unsigned int count; - bool addit; tuple_ptr = &tuple; @@ -44,19 +43,12 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv, return; } - nf_conncount_lookup(nft_net(pkt), &priv->list, tuple_ptr, zone, - &addit); - count = priv->list.count; - - if (!addit) - goto out; - - if (nf_conncount_add(&priv->list, tuple_ptr, zone) == NF_CONNCOUNT_ERR) { + if (nf_conncount_add(nft_net(pkt), &priv->list, tuple_ptr, zone)) { regs->verdict.code = NF_DROP; return; } - count++; -out: + + count = priv->list.count; if ((count > priv->limit) ^ priv->invert) { regs->verdict.code = NFT_BREAK; diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c @@ -153,7 +153,7 @@ static struct sock *nr_find_listener(ax25_address *addr) sk_for_each(s, &nr_list) if (!ax25cmp(&nr_sk(s)->source_addr, addr) && s->sk_state == TCP_LISTEN) { - bh_lock_sock(s); + sock_hold(s); goto found; } s = NULL; @@ -174,7 +174,7 @@ static struct sock *nr_find_socket(unsigned char index, unsigned char id) struct nr_sock *nr = nr_sk(s); if (nr->my_index == index && nr->my_id == id) { - bh_lock_sock(s); + sock_hold(s); goto found; } } @@ -198,7 +198,7 @@ static struct sock *nr_find_peer(unsigned char index, unsigned char id, if (nr->your_index == index && nr->your_id == id && !ax25cmp(&nr->dest_addr, dest)) { - bh_lock_sock(s); + sock_hold(s); goto found; } } @@ -224,7 +224,7 @@ static unsigned short nr_find_next_circuit(void) if (i != 0 && j != 0) { if ((sk=nr_find_socket(i, j)) == NULL) break; - bh_unlock_sock(sk); + sock_put(sk); } id++; @@ -920,6 +920,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) } if (sk != NULL) { + bh_lock_sock(sk); skb_reset_transport_header(skb); if (frametype == NR_CONNACK && skb->len == 22) @@ -929,6 +930,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) ret = nr_process_rx_frame(sk, skb); bh_unlock_sock(sk); + sock_put(sk); return ret; } @@ -960,10 +962,12 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) (make = nr_make_new(sk)) == NULL) { nr_transmit_refusal(skb, 0); if (sk) - bh_unlock_sock(sk); + sock_put(sk); return 0; } + bh_lock_sock(sk); + window = skb->data[20]; skb->sk = make; @@ -1016,6 +1020,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) sk->sk_data_ready(sk); bh_unlock_sock(sk); + sock_put(sk); nr_insert_socket(make); diff --git a/net/rds/tcp.c b/net/rds/tcp.c @@ -623,7 +623,7 @@ static void __net_exit rds_tcp_exit_net(struct net *net) if (rtn->rds_tcp_sysctl) unregister_net_sysctl_table(rtn->rds_tcp_sysctl); - if (net != &init_net && rtn->ctl_table) + if (net != &init_net) kfree(rtn->ctl_table); } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c @@ -542,7 +542,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) /* Don't enable netstamp, sunrpc doesn't need that much accuracy */ } - svsk->sk_sk->sk_stamp = skb->tstamp; + sock_write_timestamp(svsk->sk_sk, skb->tstamp); set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */ len = skb->len; diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c @@ -319,7 +319,6 @@ static int tipc_enable_bearer(struct net *net, const char *name, res = tipc_disc_create(net, b, &b->bcast_addr, &skb); if (res) { bearer_disable(net, b); - kfree(b); errstr = "failed to create discoverer"; goto rejected; } diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c @@ -904,6 +904,8 @@ static int tipc_nl_compat_publ_dump(struct tipc_nl_compat_msg *msg, u32 sock) hdr = genlmsg_put(args, 0, 0, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_PUBL_GET); + if (!hdr) + return -EMSGSIZE; nest = nla_nest_start(args, TIPC_NLA_SOCK); if (!nest) { diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c @@ -510,7 +510,7 @@ static void test_devmap(int task, void *data) fd = bpf_create_map(BPF_MAP_TYPE_DEVMAP, sizeof(key), sizeof(value), 2, 0); if (fd < 0) { - printf("Failed to create arraymap '%s'!\n", strerror(errno)); + printf("Failed to create devmap '%s'!\n", strerror(errno)); exit(1); } diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c @@ -23,6 +23,7 @@ #include <stdbool.h> #include <sched.h> #include <limits.h> +#include <assert.h> #include <sys/capability.h> @@ -2577,6 +2578,7 @@ static struct bpf_test tests[] = { }, .result = REJECT, .errstr = "invalid stack off=-79992 size=8", + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", }, { "PTR_TO_STACK store/load - out of bounds high", @@ -3104,6 +3106,8 @@ static struct bpf_test tests[] = { BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", + .result_unpriv = REJECT, .result = ACCEPT, }, { @@ -3206,6 +3210,243 @@ static struct bpf_test tests[] = { .retval_unpriv = 2, }, { + "PTR_TO_STACK check high 1", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -1), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 42, + }, + { + "PTR_TO_STACK check high 2", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ST_MEM(BPF_B, BPF_REG_1, -1, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, -1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 42, + }, + { + "PTR_TO_STACK check high 3", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0), + BPF_ST_MEM(BPF_B, BPF_REG_1, -1, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, -1), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", + .result_unpriv = REJECT, + .result = ACCEPT, + .retval = 42, + }, + { + "PTR_TO_STACK check high 4", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", + .errstr = "invalid stack off=0 size=1", + .result = REJECT, + }, + { + "PTR_TO_STACK check high 5", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, (1 << 29) - 1), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid stack off", + }, + { + "PTR_TO_STACK check high 6", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, (1 << 29) - 1), + BPF_ST_MEM(BPF_B, BPF_REG_1, SHRT_MAX, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, SHRT_MAX), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid stack off", + }, + { + "PTR_TO_STACK check high 7", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, (1 << 29) - 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, (1 << 29) - 1), + BPF_ST_MEM(BPF_B, BPF_REG_1, SHRT_MAX, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, SHRT_MAX), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", + .errstr = "fp pointer offset", + }, + { + "PTR_TO_STACK check low 1", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -512), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 42, + }, + { + "PTR_TO_STACK check low 2", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -513), + BPF_ST_MEM(BPF_B, BPF_REG_1, 1, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 1), + BPF_EXIT_INSN(), + }, + .result_unpriv = REJECT, + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", + .result = ACCEPT, + .retval = 42, + }, + { + "PTR_TO_STACK check low 3", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -513), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", + .errstr = "invalid stack off=-513 size=1", + .result = REJECT, + }, + { + "PTR_TO_STACK check low 4", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, INT_MIN), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "math between fp pointer", + }, + { + "PTR_TO_STACK check low 5", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -((1 << 29) - 1)), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid stack off", + }, + { + "PTR_TO_STACK check low 6", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -((1 << 29) - 1)), + BPF_ST_MEM(BPF_B, BPF_REG_1, SHRT_MIN, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, SHRT_MIN), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid stack off", + }, + { + "PTR_TO_STACK check low 7", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -((1 << 29) - 1)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -((1 << 29) - 1)), + BPF_ST_MEM(BPF_B, BPF_REG_1, SHRT_MIN, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, SHRT_MIN), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", + .errstr = "fp pointer offset", + }, + { + "PTR_TO_STACK mixed reg/k, 1", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -3), + BPF_MOV64_IMM(BPF_REG_2, -3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 42, + }, + { + "PTR_TO_STACK mixed reg/k, 2", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -3), + BPF_MOV64_IMM(BPF_REG_2, -3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_10), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_5, -6), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 42, + }, + { + "PTR_TO_STACK mixed reg/k, 3", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -3), + BPF_MOV64_IMM(BPF_REG_2, -3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = -3, + }, + { + "PTR_TO_STACK reg", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_MOV64_IMM(BPF_REG_2, -3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .result_unpriv = REJECT, + .errstr_unpriv = "invalid stack off=0 size=1", + .result = ACCEPT, + .retval = 42, + }, + { "stack pointer arithmetic", .insns = { BPF_MOV64_IMM(BPF_REG_1, 4), @@ -6610,7 +6851,446 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, { - "map access: known scalar += value_ptr", + "map access: known scalar += value_ptr from different maps", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R1 tried to add from different maps", + .retval = 1, + }, + { + "map access: value_ptr -= known scalar from different maps", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_IMM(BPF_REG_1, 4), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 min value is outside of the array range", + .retval = 1, + }, + { + "map access: known scalar += value_ptr from different maps, but same value properties", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result = ACCEPT, + .retval = 1, + }, + { + "map access: value_ptr += known scalar, upper oob arith, test 1", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_IMM(BPF_REG_1, 48), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", + .retval = 1, + }, + { + "map access: value_ptr += known scalar, upper oob arith, test 2", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_IMM(BPF_REG_1, 49), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", + .retval = 1, + }, + { + "map access: value_ptr += known scalar, upper oob arith, test 3", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_IMM(BPF_REG_1, 47), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", + .retval = 1, + }, + { + "map access: value_ptr -= known scalar, lower oob arith, test 1", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_IMM(BPF_REG_1, 47), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_1, 48), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = REJECT, + .errstr = "R0 min value is outside of the array range", + .result_unpriv = REJECT, + .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", + }, + { + "map access: value_ptr -= known scalar, lower oob arith, test 2", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_MOV64_IMM(BPF_REG_1, 47), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_1, 48), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", + .retval = 1, + }, + { + "map access: value_ptr -= known scalar, lower oob arith, test 3", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_IMM(BPF_REG_1, 47), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_1, 47), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", + .retval = 1, + }, + { + "map access: known scalar += value_ptr", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .retval = 1, + }, + { + "map access: value_ptr += known scalar, 1", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .retval = 1, + }, + { + "map access: value_ptr += known scalar, 2", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 49), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = REJECT, + .errstr = "invalid access to map value", + }, + { + "map access: value_ptr += known scalar, 3", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, -1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = REJECT, + .errstr = "invalid access to map value", + }, + { + "map access: value_ptr += known scalar, 4", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_MOV64_IMM(BPF_REG_1, 5), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_1, -2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_1, -1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", + .retval = 1, + }, + { + "map access: value_ptr += known scalar, 5", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, (6 + 1) * sizeof(int)), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .retval = 0xabcdef12, + }, + { + "map access: value_ptr += known scalar, 6", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_IMM(BPF_REG_1, (3 + 1) * sizeof(int)), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_1, 3 * sizeof(int)), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .retval = 0xabcdef12, + }, + { + "map access: unknown scalar += value_ptr, 1", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .retval = 1, + }, + { + "map access: unknown scalar += value_ptr, 2", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 31), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .retval = 0xabcdef12, + }, + { + "map access: unknown scalar += value_ptr, 3", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), + BPF_MOV64_IMM(BPF_REG_1, -1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 31), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", + .retval = 0xabcdef12, + }, + { + "map access: unknown scalar += value_ptr, 4", .insns = { BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), @@ -6618,19 +7298,22 @@ static struct bpf_test tests[] = { BPF_LD_MAP_FD(BPF_REG_1, 0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), - BPF_MOV64_IMM(BPF_REG_1, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_IMM(BPF_REG_1, 19), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 31), BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), BPF_EXIT_INSN(), }, .fixup_map_array_48b = { 3 }, - .result = ACCEPT, - .retval = 1, + .result = REJECT, + .errstr = "R1 max value is outside of the array range", + .errstr_unpriv = "R1 pointer arithmetic of map value goes out of range", }, { - "map access: value_ptr += known scalar", + "map access: value_ptr += unknown scalar, 1", .insns = { BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), @@ -6638,8 +7321,9 @@ static struct bpf_test tests[] = { BPF_LD_MAP_FD(BPF_REG_1, 0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), - BPF_MOV64_IMM(BPF_REG_1, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf), BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), BPF_MOV64_IMM(BPF_REG_0, 1), @@ -6650,7 +7334,7 @@ static struct bpf_test tests[] = { .retval = 1, }, { - "map access: unknown scalar += value_ptr", + "map access: value_ptr += unknown scalar, 2", .insns = { BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), @@ -6659,19 +7343,18 @@ static struct bpf_test tests[] = { BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), - BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 31), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, .fixup_map_array_48b = { 3 }, .result = ACCEPT, - .retval = 1, + .retval = 0xabcdef12, }, { - "map access: value_ptr += unknown scalar", + "map access: value_ptr += unknown scalar, 3", .insns = { BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), @@ -6679,13 +7362,20 @@ static struct bpf_test tests[] = { BPF_LD_MAP_FD(BPF_REG_1, 0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 8), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 16), BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_3, 1), + BPF_ALU64_IMM(BPF_OR, BPF_REG_3, 1), + BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_3, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_3), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_JMP_IMM(BPF_JA, 0, 0, -3), }, .fixup_map_array_48b = { 3 }, .result = ACCEPT, @@ -6770,6 +7460,8 @@ static struct bpf_test tests[] = { }, .fixup_map_array_48b = { 3 }, .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", .retval = 1, }, { @@ -6837,6 +7529,8 @@ static struct bpf_test tests[] = { }, .fixup_map_array_48b = { 3 }, .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", .retval = 1, }, { @@ -8376,6 +9070,7 @@ static struct bpf_test tests[] = { }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", + .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -8400,6 +9095,7 @@ static struct bpf_test tests[] = { }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", + .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -8426,6 +9122,7 @@ static struct bpf_test tests[] = { }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", + .errstr_unpriv = "R8 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -8451,6 +9148,7 @@ static struct bpf_test tests[] = { }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", + .errstr_unpriv = "R8 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -8499,6 +9197,7 @@ static struct bpf_test tests[] = { }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", + .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -8570,6 +9269,7 @@ static struct bpf_test tests[] = { }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", + .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -8621,6 +9321,7 @@ static struct bpf_test tests[] = { }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", + .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -8648,6 +9349,7 @@ static struct bpf_test tests[] = { }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", + .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -8674,6 +9376,7 @@ static struct bpf_test tests[] = { }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", + .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -8703,6 +9406,7 @@ static struct bpf_test tests[] = { }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", + .errstr_unpriv = "R7 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -8733,6 +9437,7 @@ static struct bpf_test tests[] = { }, .fixup_map_hash_8b = { 4 }, .errstr = "unbounded min value", + .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -8761,6 +9466,7 @@ static struct bpf_test tests[] = { }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", + .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, .result_unpriv = REJECT, }, @@ -8813,9 +9519,39 @@ static struct bpf_test tests[] = { }, .fixup_map_hash_8b = { 3 }, .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", + .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { + "check subtraction on pointers for unpriv", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_LD_MAP_FD(BPF_REG_ARG1, 0), + BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_FP), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_ARG2, 0, 9), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_9, BPF_REG_FP), + BPF_ALU64_REG(BPF_SUB, BPF_REG_9, BPF_REG_0), + BPF_LD_MAP_FD(BPF_REG_ARG1, 0), + BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_FP), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_ARG2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_9, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 1, 9 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R9 pointer -= pointer prohibited", + }, + { "bounds check based on zero-extended MOV", .insns = { BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), @@ -9146,6 +9882,36 @@ static struct bpf_test tests[] = { .result = REJECT }, { + "bounds check after 32-bit right shift with 64-bit input", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + /* r1 = 2 */ + BPF_MOV64_IMM(BPF_REG_1, 2), + /* r1 = 1<<32 */ + BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 31), + /* r1 = 0 (NOT 2!) */ + BPF_ALU32_IMM(BPF_RSH, BPF_REG_1, 31), + /* r1 = 0xffff'fffe (NOT 0!) */ + BPF_ALU32_IMM(BPF_SUB, BPF_REG_1, 2), + /* computes OOB pointer */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + /* OOB access */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + /* exit */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .errstr = "R0 invalid mem access", + .result = REJECT, + }, + { "bounds check map access with off+size signed 32bit overflow. test1", .insns = { BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), @@ -9185,6 +9951,7 @@ static struct bpf_test tests[] = { }, .fixup_map_hash_8b = { 3 }, .errstr = "pointer offset 1073741822", + .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", .result = REJECT }, { @@ -9206,6 +9973,7 @@ static struct bpf_test tests[] = { }, .fixup_map_hash_8b = { 3 }, .errstr = "pointer offset -1073741822", + .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", .result = REJECT }, { @@ -9377,6 +10145,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN() }, .errstr = "fp pointer offset 1073741822", + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", .result = REJECT }, { @@ -13719,6 +14488,328 @@ static struct bpf_test tests[] = { .insn_processed = 15, }, { + "masking, test out of bounds 1", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 5), + BPF_MOV32_IMM(BPF_REG_2, 5 - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "masking, test out of bounds 2", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 1), + BPF_MOV32_IMM(BPF_REG_2, 1 - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "masking, test out of bounds 3", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0xffffffff), + BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "masking, test out of bounds 4", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0xffffffff), + BPF_MOV32_IMM(BPF_REG_2, 1 - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "masking, test out of bounds 5", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, -1), + BPF_MOV32_IMM(BPF_REG_2, 1 - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "masking, test out of bounds 6", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, -1), + BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "masking, test out of bounds 7", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 5), + BPF_MOV32_IMM(BPF_REG_2, 5 - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "masking, test out of bounds 8", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_MOV32_IMM(BPF_REG_2, 1 - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "masking, test out of bounds 9", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 0xffffffff), + BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "masking, test out of bounds 10", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 0xffffffff), + BPF_MOV32_IMM(BPF_REG_2, 1 - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "masking, test out of bounds 11", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, -1), + BPF_MOV32_IMM(BPF_REG_2, 1 - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "masking, test out of bounds 12", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, -1), + BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "masking, test in bounds 1", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 4), + BPF_MOV32_IMM(BPF_REG_2, 5 - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 4, + }, + { + "masking, test in bounds 2", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "masking, test in bounds 3", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0xfffffffe), + BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0xfffffffe, + }, + { + "masking, test in bounds 4", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0xabcde), + BPF_MOV32_IMM(BPF_REG_2, 0xabcdef - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0xabcde, + }, + { + "masking, test in bounds 5", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_2, 1 - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "masking, test in bounds 6", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 46), + BPF_MOV32_IMM(BPF_REG_2, 47 - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 46, + }, + { + "masking, test in bounds 7", + .insns = { + BPF_MOV64_IMM(BPF_REG_3, -46), + BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, -1), + BPF_MOV32_IMM(BPF_REG_2, 47 - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_3), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_3), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_3, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 46, + }, + { + "masking, test in bounds 8", + .insns = { + BPF_MOV64_IMM(BPF_REG_3, -47), + BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, -1), + BPF_MOV32_IMM(BPF_REG_2, 47 - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_3), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_3), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_3, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { "reference tracking in call: free reference in subprog and outside", .insns = { BPF_SK_LOOKUP, @@ -14413,6 +15504,16 @@ static int create_map(uint32_t type, uint32_t size_key, return fd; } +static void update_map(int fd, int index) +{ + struct test_val value = { + .index = (6 + 1) * sizeof(int), + .foo[6] = 0xabcdef12, + }; + + assert(!bpf_map_update_elem(fd, &index, &value, 0)); +} + static int create_prog_dummy1(enum bpf_prog_type prog_type) { struct bpf_insn prog[] = { @@ -14564,6 +15665,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, if (*fixup_map_array_48b) { map_fds[3] = create_map(BPF_MAP_TYPE_ARRAY, sizeof(int), sizeof(struct test_val), 1); + update_map(map_fds[3], 0); do { prog[*fixup_map_array_48b].imm = map_fds[3]; fixup_map_array_48b++;