whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit e8746440bf68212f19688f1454dad593c74abee1
parent fe76fc6aaf538df27708ffa3e5d549a6c8e16142
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Wed, 16 Jan 2019 05:13:36 +1200

Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

Pull networking fixes from David Miller:

 1) Fix regression in multi-SKB responses to RTM_GETADDR, from Arthur
    Gautier.

 2) Fix ipv6 frag parsing in openvswitch, from Yi-Hung Wei.

 3) Unbounded recursion in ipv4 and ipv6 GUE tunnels, from Stefano
    Brivio.

 4) Use after free in hns driver, from Yonglong Liu.

 5) icmp6_send() needs to handle the case of NULL skb, from Eric
    Dumazet.

 6) Missing rcu read lock in __inet6_bind() when operating on mapped
    addresses, from David Ahern.

 7) Memory leak in tipc-nl_compat_publ_dump(), from Gustavo A. R. Silva.

 8) Fix PHY vs r8169 module loading ordering issues, from Heiner
    Kallweit.

 9) Fix bridge vlan memory leak, from Ido Schimmel.

10) Dev refcount leak in AF_PACKET, from Jason Gunthorpe.

11) Infoleak in ipv6_local_error(), flow label isn't completely
    initialized. From Eric Dumazet.

12) Handle mv88e6390 errata, from Andrew Lunn.

13) Making vhost/vsock CID hashing consistent, from Zha Bin.

14) Fix lack of UMH cleanup when it unexpectedly exits, from Taehee Yoo.

15) Bridge forwarding must clear skb->tstamp, from Paolo Abeni.

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (87 commits)
  bnxt_en: Fix context memory allocation.
  bnxt_en: Fix ring checking logic on 57500 chips.
  mISDN: hfcsusb: Use struct_size() in kzalloc()
  net: clear skb->tstamp in bridge forwarding path
  net: bpfilter: disallow to remove bpfilter module while being used
  net: bpfilter: restart bpfilter_umh when error occurred
  net: bpfilter: use cleanup callback to release umh_info
  umh: add exit routine for UMH process
  isdn: i4l: isdn_tty: Fix some concurrency double-free bugs
  vhost/vsock: fix vhost vsock cid hashing inconsistent
  net: stmmac: Prevent RX starvation in stmmac_napi_poll()
  net: stmmac: Fix the logic of checking if RX Watchdog must be enabled
  net: stmmac: Check if CBS is supported before configuring
  net: stmmac: dwxgmac2: Only clear interrupts that are active
  net: stmmac: Fix PCI module removal leak
  tools/bpf: fix bpftool map dump with bitfields
  tools/bpf: test btf bitfield with >=256 struct member offset
  bpf: fix bpffs bitfield pretty print
  net: ethernet: mediatek: fix warning in phy_start_aneg
  tcp: change txhash on SYN-data timeout
  ...

Diffstat:
MDocumentation/bpf/bpf_design_QA.rst | 11+++++------
Mdrivers/isdn/hardware/mISDN/hfcsusb.c | 3+--
Mdrivers/isdn/i4l/isdn_tty.c | 6+++++-
Mdrivers/net/bonding/bond_main.c | 3+++
Mdrivers/net/dsa/microchip/ksz_common.c | 2--
Mdrivers/net/dsa/mt7530.c | 1-
Mdrivers/net/dsa/mv88e6xxx/chip.c | 113+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mdrivers/net/dsa/mv88e6xxx/chip.h | 5+++++
Mdrivers/net/dsa/mv88e6xxx/port.h | 10++++++++++
Mdrivers/net/ethernet/broadcom/bnxt/bnxt.c | 12++++++++----
Mdrivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h | 5+++--
Mdrivers/net/ethernet/cadence/macb_main.c | 8++------
Mdrivers/net/ethernet/chelsio/cxgb3/sge.c | 12+++++++-----
Mdrivers/net/ethernet/chelsio/cxgb3/t3_hw.c | 6+++---
Mdrivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c | 8++++----
Mdrivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c | 2+-
Mdrivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 2+-
Mdrivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c | 6++----
Mdrivers/net/ethernet/hisilicon/hns/hns_enet.c | 17+++++++----------
Mdrivers/net/ethernet/intel/Kconfig | 2+-
Mdrivers/net/ethernet/marvell/octeontx2/af/cgx.c | 4+++-
Mdrivers/net/ethernet/mediatek/mtk_eth_soc.c | 16----------------
Mdrivers/net/ethernet/mellanox/mlx4/icm.c | 101+++++++++++++++++++++++++++++++++++++++++++++++--------------------------------
Mdrivers/net/ethernet/mellanox/mlx4/icm.h | 22+++++++++++++++++++---
Mdrivers/net/ethernet/mellanox/mlxsw/Kconfig | 1+
Mdrivers/net/ethernet/mellanox/mlxsw/spectrum.c | 7+++++--
Mdrivers/net/ethernet/mellanox/mlxsw/spectrum_acl_ctcam.c | 10+++++++++-
Mdrivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c | 2--
Mdrivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c | 4++--
Mdrivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 23++++++++++-------------
Mdrivers/net/ethernet/microchip/lan743x_main.c | 11++++-------
Mdrivers/net/ethernet/qlogic/qed/qed_ll2.c | 4++++
Mdrivers/net/ethernet/realtek/r8169.c | 9+++++++--
Mdrivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c | 6+++---
Mdrivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 51++++++++++++++++++++++++++-------------------------
Mdrivers/net/ethernet/stmicro/stmmac/stmmac_pci.c | 10++++++++++
Mdrivers/net/ethernet/stmicro/stmmac/stmmac_tc.c | 2++
Mdrivers/net/tun.c | 11+++++++----
Mdrivers/net/usb/cdc_ether.c | 26+++++++++++---------------
Mdrivers/net/usb/qmi_wwan.c | 1+
Mdrivers/net/wan/fsl_ucc_hdlc.c | 62+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Mdrivers/phy/ti/Kconfig | 1+
Mdrivers/ptp/ptp_chardev.c | 3++-
Mdrivers/soc/fsl/qe/qe_tdm.c | 55-------------------------------------------------------
Mdrivers/vhost/vsock.c | 2+-
Minclude/linux/bpf_verifier.h | 1+
Minclude/linux/bpfilter.h | 15++++++++++++---
Minclude/linux/qed/qed_chain.h | 31+++++++++++++++++++++++++++++++
Minclude/linux/sched.h | 9+++++++++
Minclude/linux/umh.h | 2++
Minclude/uapi/linux/ptp_clock.h | 2+-
Mkernel/bpf/btf.c | 12++++++------
Mkernel/bpf/stackmap.c | 3++-
Mkernel/bpf/verifier.c | 61++++++++++++++++++++++++++++++++++++++++++++++++-------------
Mkernel/exit.c | 1+
Mkernel/umh.c | 33+++++++++++++++++++++++++++++++--
Mnet/bpfilter/bpfilter_kern.c | 76++++++++++++++++++++++++++++++++++++++++++----------------------------------
Mnet/bpfilter/bpfilter_umh_blob.S | 2+-
Mnet/bridge/br_forward.c | 1+
Mnet/bridge/br_netfilter_hooks.c | 2+-
Mnet/bridge/br_private.h | 1+
Mnet/bridge/br_vlan.c | 26+++++++++++++-------------
Mnet/can/gw.c | 30+++++++++++++++++++++++++++---
Mnet/core/filter.c | 2+-
Mnet/core/neighbour.c | 13+++++++++----
Mnet/core/skbuff.c | 7+------
Mnet/ipv4/bpfilter/sockopt.c | 58++++++++++++++++++++++++++++++++++++++++++++++++----------
Mnet/ipv4/devinet.c | 2+-
Mnet/ipv4/fou.c | 3++-
Mnet/ipv4/ip_sockglue.c | 12+++++-------
Mnet/ipv4/tcp_timer.c | 2+-
Mnet/ipv6/addrconf.c | 2+-
Mnet/ipv6/af_inet6.c | 14+++++++++++++-
Mnet/ipv6/datagram.c | 11+++++------
Mnet/ipv6/fou6.c | 8++++++++
Mnet/ipv6/icmp.c | 8++++++--
Mnet/ipv6/udp.c | 8++++----
Mnet/openvswitch/flow.c | 8+++++---
Mnet/packet/af_packet.c | 4++--
Mnet/rds/ib_send.c | 4++--
Mnet/rds/message.c | 4++--
Mnet/rds/rds.h | 4----
Mnet/rds/send.c | 2+-
Mnet/smc/af_smc.c | 4+++-
Mnet/tipc/netlink_compat.c | 4+++-
Msamples/bpf/test_cgrp2_attach2.c | 14+++++++-------
Msamples/bpf/test_current_task_under_cgroup_user.c | 2+-
Msamples/bpf/xdp1_user.c | 2+-
Mtools/bpf/bpftool/btf_dumper.c | 13+++++++------
Mtools/lib/bpf/.gitignore | 1+
Mtools/lib/bpf/README.rst | 14++++++++++++++
Mtools/testing/selftests/bpf/.gitignore | 1+
Mtools/testing/selftests/bpf/Makefile | 4+++-
Mtools/testing/selftests/bpf/cgroup_helpers.c | 6+++---
Mtools/testing/selftests/bpf/test_btf.c | 29+++++++++++++++++++++--------
Mtools/testing/selftests/bpf/test_cgroup_storage.c | 2+-
Mtools/testing/selftests/bpf/test_dev_cgroup.c | 2+-
Mtools/testing/selftests/bpf/test_netcnt.c | 2+-
Mtools/testing/selftests/bpf/test_skb_cgroup_id_user.c | 2+-
Mtools/testing/selftests/bpf/test_sock.c | 2+-
Mtools/testing/selftests/bpf/test_sock_addr.c | 55+++++++++++++++++++++++++++++++++++++++++++++++++++----
Mtools/testing/selftests/bpf/test_socket_cookie.c | 2+-
Mtools/testing/selftests/bpf/test_tcpbpf_user.c | 2+-
Mtools/testing/selftests/bpf/test_tcpnotify_user.c | 2+-
Mtools/testing/selftests/bpf/test_verifier.c | 120+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtools/testing/selftests/drivers/net/mlxsw/vxlan.sh | 18++++++++++++++++++
Mtools/testing/selftests/net/forwarding/bridge_vlan_aware.sh | 15++++++++++++++-
Mtools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh | 2+-
Mtools/testing/selftests/net/ip_defrag.c | 96+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
Mtools/testing/selftests/net/ip_defrag.sh | 9++++++++-
110 files changed, 1149 insertions(+), 438 deletions(-)

diff --git a/Documentation/bpf/bpf_design_QA.rst b/Documentation/bpf/bpf_design_QA.rst @@ -157,12 +157,11 @@ Q: Does BPF have a stable ABI? ------------------------------ A: YES. BPF instructions, arguments to BPF programs, set of helper functions and their arguments, recognized return codes are all part -of ABI. However when tracing programs are using bpf_probe_read() helper -to walk kernel internal datastructures and compile with kernel -internal headers these accesses can and will break with newer -kernels. The union bpf_attr -> kern_version is checked at load time -to prevent accidentally loading kprobe-based bpf programs written -for a different kernel. Networking programs don't do kern_version check. +of ABI. However there is one specific exception to tracing programs +which are using helpers like bpf_probe_read() to walk kernel internal +data structures and compile with kernel internal headers. Both of these +kernel internals are subject to change and can break with newer kernels +such that the program needs to be adapted accordingly. Q: How much stack space a BPF program uses? ------------------------------------------- diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.c b/drivers/isdn/hardware/mISDN/hfcsusb.c @@ -262,8 +262,7 @@ hfcsusb_ph_info(struct hfcsusb *hw) struct dchannel *dch = &hw->dch; int i; - phi = kzalloc(sizeof(struct ph_info) + - dch->dev.nrbchan * sizeof(struct ph_info_ch), GFP_ATOMIC); + phi = kzalloc(struct_size(phi, bch, dch->dev.nrbchan), GFP_ATOMIC); phi->dch.ch.protocol = hw->protocol; phi->dch.ch.Flags = dch->Flags; phi->dch.state = dch->state; diff --git a/drivers/isdn/i4l/isdn_tty.c b/drivers/isdn/i4l/isdn_tty.c @@ -1437,15 +1437,19 @@ isdn_tty_set_termios(struct tty_struct *tty, struct ktermios *old_termios) { modem_info *info = (modem_info *) tty->driver_data; + mutex_lock(&modem_info_mutex); if (!old_termios) isdn_tty_change_speed(info); else { if (tty->termios.c_cflag == old_termios->c_cflag && tty->termios.c_ispeed == old_termios->c_ispeed && - tty->termios.c_ospeed == old_termios->c_ospeed) + tty->termios.c_ospeed == old_termios->c_ospeed) { + mutex_unlock(&modem_info_mutex); return; + } isdn_tty_change_speed(info); } + mutex_unlock(&modem_info_mutex); } /* diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c @@ -1963,6 +1963,9 @@ static int __bond_release_one(struct net_device *bond_dev, if (!bond_has_slaves(bond)) { bond_set_carrier(bond); eth_hw_addr_random(bond_dev); + bond->nest_level = SINGLE_DEPTH_NESTING; + } else { + bond->nest_level = dev_get_nest_level(bond_dev) + 1; } unblock_netpoll_tx(); diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c @@ -7,7 +7,6 @@ #include <linux/delay.h> #include <linux/export.h> -#include <linux/gpio.h> #include <linux/gpio/consumer.h> #include <linux/kernel.h> #include <linux/module.h> @@ -15,7 +14,6 @@ #include <linux/phy.h> #include <linux/etherdevice.h> #include <linux/if_bridge.h> -#include <linux/of_gpio.h> #include <linux/of_net.h> #include <net/dsa.h> #include <net/switchdev.h> diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c @@ -18,7 +18,6 @@ #include <linux/mfd/syscon.h> #include <linux/module.h> #include <linux/netdevice.h> -#include <linux/of_gpio.h> #include <linux/of_mdio.h> #include <linux/of_net.h> #include <linux/of_platform.h> diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c @@ -2403,6 +2403,107 @@ static int mv88e6xxx_stats_setup(struct mv88e6xxx_chip *chip) return mv88e6xxx_g1_stats_clear(chip); } +/* The mv88e6390 has some hidden registers used for debug and + * development. The errata also makes use of them. + */ +static int mv88e6390_hidden_write(struct mv88e6xxx_chip *chip, int port, + int reg, u16 val) +{ + u16 ctrl; + int err; + + err = mv88e6xxx_port_write(chip, PORT_RESERVED_1A_DATA_PORT, + PORT_RESERVED_1A, val); + if (err) + return err; + + ctrl = PORT_RESERVED_1A_BUSY | PORT_RESERVED_1A_WRITE | + PORT_RESERVED_1A_BLOCK | port << PORT_RESERVED_1A_PORT_SHIFT | + reg; + + return mv88e6xxx_port_write(chip, PORT_RESERVED_1A_CTRL_PORT, + PORT_RESERVED_1A, ctrl); +} + +static int mv88e6390_hidden_wait(struct mv88e6xxx_chip *chip) +{ + return mv88e6xxx_wait(chip, PORT_RESERVED_1A_CTRL_PORT, + PORT_RESERVED_1A, PORT_RESERVED_1A_BUSY); +} + + +static int mv88e6390_hidden_read(struct mv88e6xxx_chip *chip, int port, + int reg, u16 *val) +{ + u16 ctrl; + int err; + + ctrl = PORT_RESERVED_1A_BUSY | PORT_RESERVED_1A_READ | + PORT_RESERVED_1A_BLOCK | port << PORT_RESERVED_1A_PORT_SHIFT | + reg; + + err = mv88e6xxx_port_write(chip, PORT_RESERVED_1A_CTRL_PORT, + PORT_RESERVED_1A, ctrl); + if (err) + return err; + + err = mv88e6390_hidden_wait(chip); + if (err) + return err; + + return mv88e6xxx_port_read(chip, PORT_RESERVED_1A_DATA_PORT, + PORT_RESERVED_1A, val); +} + +/* Check if the errata has already been applied. */ +static bool mv88e6390_setup_errata_applied(struct mv88e6xxx_chip *chip) +{ + int port; + int err; + u16 val; + + for (port = 0; port < mv88e6xxx_num_ports(chip); port++) { + err = mv88e6390_hidden_read(chip, port, 0, &val); + if (err) { + dev_err(chip->dev, + "Error reading hidden register: %d\n", err); + return false; + } + if (val != 0x01c0) + return false; + } + + return true; +} + +/* The 6390 copper ports have an errata which require poking magic + * values into undocumented hidden registers and then performing a + * software reset. + */ +static int mv88e6390_setup_errata(struct mv88e6xxx_chip *chip) +{ + int port; + int err; + + if (mv88e6390_setup_errata_applied(chip)) + return 0; + + /* Set the ports into blocking mode */ + for (port = 0; port < mv88e6xxx_num_ports(chip); port++) { + err = mv88e6xxx_port_set_state(chip, port, BR_STATE_DISABLED); + if (err) + return err; + } + + for (port = 0; port < mv88e6xxx_num_ports(chip); port++) { + err = mv88e6390_hidden_write(chip, port, 0, 0x01c0); + if (err) + return err; + } + + return mv88e6xxx_software_reset(chip); +} + static int mv88e6xxx_setup(struct dsa_switch *ds) { struct mv88e6xxx_chip *chip = ds->priv; @@ -2415,6 +2516,12 @@ static int mv88e6xxx_setup(struct dsa_switch *ds) mutex_lock(&chip->reg_lock); + if (chip->info->ops->setup_errata) { + err = chip->info->ops->setup_errata(chip); + if (err) + goto unlock; + } + /* Cache the cmode of each port. */ for (i = 0; i < mv88e6xxx_num_ports(chip); i++) { if (chip->info->ops->port_get_cmode) { @@ -3226,6 +3333,7 @@ static const struct mv88e6xxx_ops mv88e6185_ops = { static const struct mv88e6xxx_ops mv88e6190_ops = { /* MV88E6XXX_FAMILY_6390 */ + .setup_errata = mv88e6390_setup_errata, .irl_init_all = mv88e6390_g2_irl_init_all, .get_eeprom = mv88e6xxx_g2_get_eeprom8, .set_eeprom = mv88e6xxx_g2_set_eeprom8, @@ -3269,6 +3377,7 @@ static const struct mv88e6xxx_ops mv88e6190_ops = { static const struct mv88e6xxx_ops mv88e6190x_ops = { /* MV88E6XXX_FAMILY_6390 */ + .setup_errata = mv88e6390_setup_errata, .irl_init_all = mv88e6390_g2_irl_init_all, .get_eeprom = mv88e6xxx_g2_get_eeprom8, .set_eeprom = mv88e6xxx_g2_set_eeprom8, @@ -3312,6 +3421,7 @@ static const struct mv88e6xxx_ops mv88e6190x_ops = { static const struct mv88e6xxx_ops mv88e6191_ops = { /* MV88E6XXX_FAMILY_6390 */ + .setup_errata = mv88e6390_setup_errata, .irl_init_all = mv88e6390_g2_irl_init_all, .get_eeprom = mv88e6xxx_g2_get_eeprom8, .set_eeprom = mv88e6xxx_g2_set_eeprom8, @@ -3404,6 +3514,7 @@ static const struct mv88e6xxx_ops mv88e6240_ops = { static const struct mv88e6xxx_ops mv88e6290_ops = { /* MV88E6XXX_FAMILY_6390 */ + .setup_errata = mv88e6390_setup_errata, .irl_init_all = mv88e6390_g2_irl_init_all, .get_eeprom = mv88e6xxx_g2_get_eeprom8, .set_eeprom = mv88e6xxx_g2_set_eeprom8, @@ -3709,6 +3820,7 @@ static const struct mv88e6xxx_ops mv88e6352_ops = { static const struct mv88e6xxx_ops mv88e6390_ops = { /* MV88E6XXX_FAMILY_6390 */ + .setup_errata = mv88e6390_setup_errata, .irl_init_all = mv88e6390_g2_irl_init_all, .get_eeprom = mv88e6xxx_g2_get_eeprom8, .set_eeprom = mv88e6xxx_g2_set_eeprom8, @@ -3756,6 +3868,7 @@ static const struct mv88e6xxx_ops mv88e6390_ops = { static const struct mv88e6xxx_ops mv88e6390x_ops = { /* MV88E6XXX_FAMILY_6390 */ + .setup_errata = mv88e6390_setup_errata, .irl_init_all = mv88e6390_g2_irl_init_all, .get_eeprom = mv88e6xxx_g2_get_eeprom8, .set_eeprom = mv88e6xxx_g2_set_eeprom8, diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h @@ -300,6 +300,11 @@ struct mv88e6xxx_mdio_bus { }; struct mv88e6xxx_ops { + /* Switch Setup Errata, called early in the switch setup to + * allow any errata actions to be performed + */ + int (*setup_errata)(struct mv88e6xxx_chip *chip); + int (*ieee_pri_map)(struct mv88e6xxx_chip *chip); int (*ip_pri_map)(struct mv88e6xxx_chip *chip); diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h @@ -251,6 +251,16 @@ /* Offset 0x19: Port IEEE Priority Remapping Registers (4-7) */ #define MV88E6095_PORT_IEEE_PRIO_REMAP_4567 0x19 +/* Offset 0x1a: Magic undocumented errata register */ +#define PORT_RESERVED_1A 0x1a +#define PORT_RESERVED_1A_BUSY BIT(15) +#define PORT_RESERVED_1A_WRITE BIT(14) +#define PORT_RESERVED_1A_READ 0 +#define PORT_RESERVED_1A_PORT_SHIFT 5 +#define PORT_RESERVED_1A_BLOCK (0xf << 10) +#define PORT_RESERVED_1A_CTRL_PORT 4 +#define PORT_RESERVED_1A_DATA_PORT 5 + int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port, int reg, u16 *val); int mv88e6xxx_port_write(struct mv88e6xxx_chip *chip, int port, int reg, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5601,7 +5601,8 @@ static int bnxt_hwrm_check_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings, FUNC_CFG_REQ_FLAGS_STAT_CTX_ASSETS_TEST | FUNC_CFG_REQ_FLAGS_VNIC_ASSETS_TEST; if (bp->flags & BNXT_FLAG_CHIP_P5) - flags |= FUNC_CFG_REQ_FLAGS_RSSCOS_CTX_ASSETS_TEST; + flags |= FUNC_CFG_REQ_FLAGS_RSSCOS_CTX_ASSETS_TEST | + FUNC_CFG_REQ_FLAGS_NQ_ASSETS_TEST; else flags |= FUNC_CFG_REQ_FLAGS_RING_GRP_ASSETS_TEST; } @@ -6221,9 +6222,12 @@ static int bnxt_alloc_ctx_pg_tbls(struct bnxt *bp, rmem->pg_tbl_map = ctx_pg->ctx_dma_arr[i]; rmem->depth = 1; rmem->nr_pages = MAX_CTX_PAGES; - if (i == (nr_tbls - 1)) - rmem->nr_pages = ctx_pg->nr_pages % - MAX_CTX_PAGES; + if (i == (nr_tbls - 1)) { + int rem = ctx_pg->nr_pages % MAX_CTX_PAGES; + + if (rem) + rmem->nr_pages = rem; + } rc = bnxt_alloc_ctx_mem_blk(bp, pg_tbl); if (rc) break; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h @@ -386,8 +386,8 @@ struct hwrm_err_output { #define HWRM_VERSION_MAJOR 1 #define HWRM_VERSION_MINOR 10 #define HWRM_VERSION_UPDATE 0 -#define HWRM_VERSION_RSVD 33 -#define HWRM_VERSION_STR "1.10.0.33" +#define HWRM_VERSION_RSVD 35 +#define HWRM_VERSION_STR "1.10.0.35" /* hwrm_ver_get_input (size:192b/24B) */ struct hwrm_ver_get_input { @@ -1184,6 +1184,7 @@ struct hwrm_func_cfg_input { #define FUNC_CFG_REQ_FLAGS_L2_CTX_ASSETS_TEST 0x100000UL #define FUNC_CFG_REQ_FLAGS_TRUSTED_VF_ENABLE 0x200000UL #define FUNC_CFG_REQ_FLAGS_DYNAMIC_TX_RING_ALLOC 0x400000UL + #define FUNC_CFG_REQ_FLAGS_NQ_ASSETS_TEST 0x800000UL __le32 enables; #define FUNC_CFG_REQ_ENABLES_MTU 0x1UL #define FUNC_CFG_REQ_ENABLES_MRU 0x2UL diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c @@ -1738,12 +1738,8 @@ static int macb_pad_and_fcs(struct sk_buff **skb, struct net_device *ndev) *skb = nskb; } - if (padlen) { - if (padlen >= ETH_FCS_LEN) - skb_put_zero(*skb, padlen - ETH_FCS_LEN); - else - skb_trim(*skb, ETH_FCS_LEN - padlen); - } + if (padlen > ETH_FCS_LEN) + skb_put_zero(*skb, padlen - ETH_FCS_LEN); add_fcs: /* set FCS to packet */ diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c b/drivers/net/ethernet/chelsio/cxgb3/sge.c @@ -2381,7 +2381,7 @@ no_mem: lro_add_page(adap, qs, fl, G_RSPD_LEN(len), flags & F_RSPD_EOP); - goto next_fl; + goto next_fl; } skb = get_packet_pg(adap, fl, q, @@ -3214,11 +3214,13 @@ void t3_start_sge_timers(struct adapter *adap) for (i = 0; i < SGE_QSETS; ++i) { struct sge_qset *q = &adap->sge.qs[i]; - if (q->tx_reclaim_timer.function) - mod_timer(&q->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD); + if (q->tx_reclaim_timer.function) + mod_timer(&q->tx_reclaim_timer, + jiffies + TX_RECLAIM_PERIOD); - if (q->rx_reclaim_timer.function) - mod_timer(&q->rx_reclaim_timer, jiffies + RX_RECLAIM_PERIOD); + if (q->rx_reclaim_timer.function) + mod_timer(&q->rx_reclaim_timer, + jiffies + RX_RECLAIM_PERIOD); } } diff --git a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c @@ -1082,7 +1082,7 @@ int t3_check_fw_version(struct adapter *adapter) CH_WARN(adapter, "found newer FW version(%u.%u), " "driver compiled for version %u.%u\n", major, minor, FW_VERSION_MAJOR, FW_VERSION_MINOR); - return 0; + return 0; } return -EINVAL; } @@ -3619,7 +3619,7 @@ int t3_reset_adapter(struct adapter *adapter) static int init_parity(struct adapter *adap) { - int i, err, addr; + int i, err, addr; if (t3_read_reg(adap, A_SG_CONTEXT_CMD) & F_CONTEXT_CMD_BUSY) return -EBUSY; @@ -3806,6 +3806,6 @@ int t3_replay_prep_adapter(struct adapter *adapter) p->phy.ops->power_down(&p->phy, 1); } -return 0; + return 0; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c @@ -378,10 +378,10 @@ static void cxgb4_init_ptp_timer(struct adapter *adapter) int err; memset(&c, 0, sizeof(c)); - c.op_to_portid = cpu_to_be32(FW_CMD_OP_V(FW_PTP_CMD) | - FW_CMD_REQUEST_F | - FW_CMD_WRITE_F | - FW_PTP_CMD_PORTID_V(0)); + c.op_to_portid = cpu_to_be32(FW_CMD_OP_V(FW_PTP_CMD) | + FW_CMD_REQUEST_F | + FW_CMD_WRITE_F | + FW_PTP_CMD_PORTID_V(0)); c.retval_len16 = cpu_to_be32(FW_CMD_LEN16_V(sizeof(c) / 16)); c.u.scmd.sc = FW_PTP_SC_INIT_TIMER; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c @@ -78,7 +78,7 @@ static void free_msix_idx_in_bmap(struct adapter *adap, unsigned int msix_idx) unsigned long flags; spin_lock_irqsave(&bmap->lock, flags); - __clear_bit(msix_idx, bmap->msix_bmap); + __clear_bit(msix_idx, bmap->msix_bmap); spin_unlock_irqrestore(&bmap->lock, flags); } diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -3794,7 +3794,7 @@ int t4_load_phy_fw(struct adapter *adap, /* If we have version number support, then check to see if the adapter * already has up-to-date PHY firmware loaded. */ - if (phy_fw_version) { + if (phy_fw_version) { new_phy_fw_vers = phy_fw_version(phy_fw_data, phy_fw_size); ret = t4_phy_fw_ver(adap, &cur_phy_fw_ver); if (ret < 0) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c @@ -147,12 +147,10 @@ static void hns_ae_put_handle(struct hnae_handle *handle) struct hnae_vf_cb *vf_cb = hns_ae_get_vf_cb(handle); int i; - vf_cb->mac_cb = NULL; - - kfree(vf_cb); - for (i = 0; i < handle->q_num; i++) hns_ae_get_ring_pair(handle->qs[i])->used_by_vf = 0; + + kfree(vf_cb); } static int hns_ae_wait_flow_down(struct hnae_handle *handle) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -1170,6 +1170,13 @@ int hns_nic_init_phy(struct net_device *ndev, struct hnae_handle *h) if (!h->phy_dev) return 0; + ethtool_convert_legacy_u32_to_link_mode(supported, h->if_support); + linkmode_and(phy_dev->supported, phy_dev->supported, supported); + linkmode_copy(phy_dev->advertising, phy_dev->supported); + + if (h->phy_if == PHY_INTERFACE_MODE_XGMII) + phy_dev->autoneg = false; + if (h->phy_if != PHY_INTERFACE_MODE_XGMII) { phy_dev->dev_flags = 0; @@ -1181,16 +1188,6 @@ int hns_nic_init_phy(struct net_device *ndev, struct hnae_handle *h) if (unlikely(ret)) return -ENODEV; - ethtool_convert_legacy_u32_to_link_mode(supported, h->if_support); - linkmode_and(phy_dev->supported, phy_dev->supported, supported); - linkmode_copy(phy_dev->advertising, phy_dev->supported); - - if (h->phy_if == PHY_INTERFACE_MODE_XGMII) - phy_dev->autoneg = false; - - if (h->phy_if == PHY_INTERFACE_MODE_SGMII) - phy_stop(phy_dev); - return 0; } diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig @@ -159,7 +159,7 @@ config IXGBE tristate "Intel(R) 10GbE PCI Express adapters support" depends on PCI select MDIO - select MDIO_DEVICE + select PHYLIB imply PTP_1588_CLOCK ---help--- This driver supports Intel(R) 10GbE PCI Express family of diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -825,7 +825,7 @@ static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (!cgx->cgx_cmd_workq) { dev_err(dev, "alloc workqueue failed for cgx cmd"); err = -ENOMEM; - goto err_release_regions; + goto err_free_irq_vectors; } list_add(&cgx->cgx_list, &cgx_list); @@ -841,6 +841,8 @@ static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id) err_release_lmac: cgx_lmac_exit(cgx); list_del(&cgx->cgx_list); +err_free_irq_vectors: + pci_free_irq_vectors(pdev); err_release_regions: pci_release_regions(pdev); err_disable_device: diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -258,11 +258,6 @@ static void mtk_phy_link_adjust(struct net_device *dev) mtk_w32(mac->hw, mcr, MTK_MAC_MCR(mac->id)); - if (dev->phydev->link) - netif_carrier_on(dev); - else - netif_carrier_off(dev); - if (!of_phy_is_fixed_link(mac->of_node)) phy_print_status(dev->phydev); } @@ -347,17 +342,6 @@ static int mtk_phy_connect(struct net_device *dev) if (mtk_phy_connect_node(eth, mac, np)) goto err_phy; - dev->phydev->autoneg = AUTONEG_ENABLE; - dev->phydev->speed = 0; - dev->phydev->duplex = 0; - - phy_set_max_speed(dev->phydev, SPEED_1000); - phy_support_asym_pause(dev->phydev); - linkmode_copy(dev->phydev->advertising, dev->phydev->supported); - linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, - dev->phydev->advertising); - phy_start_aneg(dev->phydev); - of_node_put(np); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c @@ -57,12 +57,12 @@ static void mlx4_free_icm_pages(struct mlx4_dev *dev, struct mlx4_icm_chunk *chu int i; if (chunk->nsg > 0) - pci_unmap_sg(dev->persist->pdev, chunk->mem, chunk->npages, - PCI_DMA_BIDIRECTIONAL); + dma_unmap_sg(&dev->persist->pdev->dev, chunk->sg, chunk->npages, + DMA_BIDIRECTIONAL); for (i = 0; i < chunk->npages; ++i) - __free_pages(sg_page(&chunk->mem[i]), - get_order(chunk->mem[i].length)); + __free_pages(sg_page(&chunk->sg[i]), + get_order(chunk->sg[i].length)); } static void mlx4_free_icm_coherent(struct mlx4_dev *dev, struct mlx4_icm_chunk *chunk) @@ -71,9 +71,9 @@ static void mlx4_free_icm_coherent(struct mlx4_dev *dev, struct mlx4_icm_chunk * for (i = 0; i < chunk->npages; ++i) dma_free_coherent(&dev->persist->pdev->dev, - chunk->mem[i].length, - lowmem_page_address(sg_page(&chunk->mem[i])), - sg_dma_address(&chunk->mem[i])); + chunk->buf[i].size, + chunk->buf[i].addr, + chunk->buf[i].dma_addr); } void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent) @@ -111,22 +111,21 @@ static int mlx4_alloc_icm_pages(struct scatterlist *mem, int order, return 0; } -static int mlx4_alloc_icm_coherent(struct device *dev, struct scatterlist *mem, - int order, gfp_t gfp_mask) +static int mlx4_alloc_icm_coherent(struct device *dev, struct mlx4_icm_buf *buf, + int order, gfp_t gfp_mask) { - void *buf = dma_alloc_coherent(dev, PAGE_SIZE << order, - &sg_dma_address(mem), gfp_mask); - if (!buf) + buf->addr = dma_alloc_coherent(dev, PAGE_SIZE << order, + &buf->dma_addr, gfp_mask); + if (!buf->addr) return -ENOMEM; - if (offset_in_page(buf)) { - dma_free_coherent(dev, PAGE_SIZE << order, - buf, sg_dma_address(mem)); + if (offset_in_page(buf->addr)) { + dma_free_coherent(dev, PAGE_SIZE << order, buf->addr, + buf->dma_addr); return -ENOMEM; } - sg_set_buf(mem, buf, PAGE_SIZE << order); - sg_dma_len(mem) = PAGE_SIZE << order; + buf->size = PAGE_SIZE << order; return 0; } @@ -159,21 +158,21 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, while (npages > 0) { if (!chunk) { - chunk = kmalloc_node(sizeof(*chunk), + chunk = kzalloc_node(sizeof(*chunk), gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN), dev->numa_node); if (!chunk) { - chunk = kmalloc(sizeof(*chunk), + chunk = kzalloc(sizeof(*chunk), gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN)); if (!chunk) goto fail; } + chunk->coherent = coherent; - sg_init_table(chunk->mem, MLX4_ICM_CHUNK_LEN); - chunk->npages = 0; - chunk->nsg = 0; + if (!coherent) + sg_init_table(chunk->sg, MLX4_ICM_CHUNK_LEN); list_add_tail(&chunk->list, &icm->chunk_list); } @@ -186,10 +185,10 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, if (coherent) ret = mlx4_alloc_icm_coherent(&dev->persist->pdev->dev, - &chunk->mem[chunk->npages], - cur_order, mask); + &chunk->buf[chunk->npages], + cur_order, mask); else - ret = mlx4_alloc_icm_pages(&chunk->mem[chunk->npages], + ret = mlx4_alloc_icm_pages(&chunk->sg[chunk->npages], cur_order, mask, dev->numa_node); @@ -205,9 +204,9 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, if (coherent) ++chunk->nsg; else if (chunk->npages == MLX4_ICM_CHUNK_LEN) { - chunk->nsg = pci_map_sg(dev->persist->pdev, chunk->mem, - chunk->npages, - PCI_DMA_BIDIRECTIONAL); + chunk->nsg = dma_map_sg(&dev->persist->pdev->dev, + chunk->sg, chunk->npages, + DMA_BIDIRECTIONAL); if (chunk->nsg <= 0) goto fail; @@ -220,9 +219,8 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, } if (!coherent && chunk) { - chunk->nsg = pci_map_sg(dev->persist->pdev, chunk->mem, - chunk->npages, - PCI_DMA_BIDIRECTIONAL); + chunk->nsg = dma_map_sg(&dev->persist->pdev->dev, chunk->sg, + chunk->npages, DMA_BIDIRECTIONAL); if (chunk->nsg <= 0) goto fail; @@ -320,7 +318,7 @@ void *mlx4_table_find(struct mlx4_icm_table *table, u32 obj, u64 idx; struct mlx4_icm_chunk *chunk; struct mlx4_icm *icm; - struct page *page = NULL; + void *addr = NULL; if (!table->lowmem) return NULL; @@ -336,28 +334,49 @@ void *mlx4_table_find(struct mlx4_icm_table *table, u32 obj, list_for_each_entry(chunk, &icm->chunk_list, list) { for (i = 0; i < chunk->npages; ++i) { + dma_addr_t dma_addr; + size_t len; + + if (table->coherent) { + len = chunk->buf[i].size; + dma_addr = chunk->buf[i].dma_addr; + addr = chunk->buf[i].addr; + } else { + struct page *page; + + len = sg_dma_len(&chunk->sg[i]); + dma_addr = sg_dma_address(&chunk->sg[i]); + + /* XXX: we should never do this for highmem + * allocation. This function either needs + * to be split, or the kernel virtual address + * return needs to be made optional. + */ + page = sg_page(&chunk->sg[i]); + addr = lowmem_page_address(page); + } + if (dma_handle && dma_offset >= 0) { - if (sg_dma_len(&chunk->mem[i]) > dma_offset) - *dma_handle = sg_dma_address(&chunk->mem[i]) + - dma_offset; - dma_offset -= sg_dma_len(&chunk->mem[i]); + if (len > dma_offset) + *dma_handle = dma_addr + dma_offset; + dma_offset -= len; } + /* * DMA mapping can merge pages but not split them, * so if we found the page, dma_handle has already * been assigned to. */ - if (chunk->mem[i].length > offset) { - page = sg_page(&chunk->mem[i]); + if (len > offset) goto out; - } - offset -= chunk->mem[i].length; + offset -= len; } } + addr = NULL; out: mutex_unlock(&table->mutex); - return page ? lowmem_page_address(page) + offset : NULL; + return addr ? addr + offset : NULL; } int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.h b/drivers/net/ethernet/mellanox/mlx4/icm.h @@ -47,11 +47,21 @@ enum { MLX4_ICM_PAGE_SIZE = 1 << MLX4_ICM_PAGE_SHIFT, }; +struct mlx4_icm_buf { + void *addr; + size_t size; + dma_addr_t dma_addr; +}; + struct mlx4_icm_chunk { struct list_head list; int npages; int nsg; - struct scatterlist mem[MLX4_ICM_CHUNK_LEN]; + bool coherent; + union { + struct scatterlist sg[MLX4_ICM_CHUNK_LEN]; + struct mlx4_icm_buf buf[MLX4_ICM_CHUNK_LEN]; + }; }; struct mlx4_icm { @@ -114,12 +124,18 @@ static inline void mlx4_icm_next(struct mlx4_icm_iter *iter) static inline dma_addr_t mlx4_icm_addr(struct mlx4_icm_iter *iter) { - return sg_dma_address(&iter->chunk->mem[iter->page_idx]); + if (iter->chunk->coherent) + return iter->chunk->buf[iter->page_idx].dma_addr; + else + return sg_dma_address(&iter->chunk->sg[iter->page_idx]); } static inline unsigned long mlx4_icm_size(struct mlx4_icm_iter *iter) { - return sg_dma_len(&iter->chunk->mem[iter->page_idx]); + if (iter->chunk->coherent) + return iter->chunk->buf[iter->page_idx].size; + else + return sg_dma_len(&iter->chunk->sg[iter->page_idx]); } int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm); diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig @@ -78,6 +78,7 @@ config MLXSW_SPECTRUM depends on IPV6 || IPV6=n depends on NET_IPGRE || NET_IPGRE=n depends on IPV6_GRE || IPV6_GRE=n + depends on VXLAN || VXLAN=n select GENERIC_ALLOCATOR select PARMAN select OBJAGG diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -5005,12 +5005,15 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, lower_dev, upper_dev); } else if (netif_is_lag_master(upper_dev)) { - if (info->linking) + if (info->linking) { err = mlxsw_sp_port_lag_join(mlxsw_sp_port, upper_dev); - else + } else { + mlxsw_sp_port_lag_tx_en_set(mlxsw_sp_port, + false); mlxsw_sp_port_lag_leave(mlxsw_sp_port, upper_dev); + } } else if (netif_is_ovs_master(upper_dev)) { if (info->linking) err = mlxsw_sp_port_ovs_join(mlxsw_sp_port); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_ctcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_ctcam.c @@ -72,7 +72,15 @@ mlxsw_sp_acl_ctcam_region_entry_insert(struct mlxsw_sp *mlxsw_sp, act_set = mlxsw_afa_block_first_set(rulei->act_block); mlxsw_reg_ptce2_flex_action_set_memcpy_to(ptce2_pl, act_set); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl); + if (err) + goto err_ptce2_write; + + return 0; + +err_ptce2_write: + cregion->ops->entry_remove(cregion, centry); + return err; } static void diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c @@ -1022,7 +1022,6 @@ void mlxsw_sp_acl_erp_mask_put(struct mlxsw_sp_acl_atcam_region *aregion, { struct objagg_obj *objagg_obj = (struct objagg_obj *) erp_mask; - ASSERT_RTNL(); objagg_obj_put(aregion->erp_table->objagg, objagg_obj); } @@ -1054,7 +1053,6 @@ void mlxsw_sp_acl_erp_bf_remove(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_acl_erp *erp = objagg_obj_root_priv(objagg_obj); unsigned int erp_bank; - ASSERT_RTNL(); if (!mlxsw_sp_acl_erp_table_is_used(erp->erp_table)) return; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c @@ -816,14 +816,14 @@ int mlxsw_sp_nve_fid_enable(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *fid, ops = nve->nve_ops_arr[params->type]; if (!ops->can_offload(nve, params->dev, extack)) - return -EOPNOTSUPP; + return -EINVAL; memset(&config, 0, sizeof(config)); ops->nve_config(nve, params->dev, &config); if (nve->num_nve_tunnels && memcmp(&config, &nve->config, sizeof(config))) { NL_SET_ERR_MSG_MOD(extack, "Conflicting NVE tunnels configuration"); - return -EOPNOTSUPP; + return -EINVAL; } err = mlxsw_sp_nve_tunnel_init(mlxsw_sp, &config); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1078,8 +1078,7 @@ static int mlxsw_sp_bridge_port_vlan_add(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_bridge_port *bridge_port, u16 vid, bool is_untagged, bool is_pvid, - struct netlink_ext_ack *extack, - struct switchdev_trans *trans) + struct netlink_ext_ack *extack) { u16 pvid = mlxsw_sp_port_pvid_determine(mlxsw_sp_port, vid, is_pvid); struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; @@ -1095,9 +1094,6 @@ mlxsw_sp_bridge_port_vlan_add(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_port_vlan->bridge_port != bridge_port) return -EEXIST; - if (switchdev_trans_ph_prepare(trans)) - return 0; - if (!mlxsw_sp_port_vlan) { mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_create(mlxsw_sp_port, vid); @@ -1188,6 +1184,9 @@ static int mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, return err; } + if (switchdev_trans_ph_commit(trans)) + return 0; + bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev); if (WARN_ON(!bridge_port)) return -EINVAL; @@ -1200,7 +1199,7 @@ static int mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, err = mlxsw_sp_bridge_port_vlan_add(mlxsw_sp_port, bridge_port, vid, flag_untagged, - flag_pvid, extack, trans); + flag_pvid, extack); if (err) return err; } @@ -1808,7 +1807,7 @@ static void mlxsw_sp_bridge_port_vlan_del(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_bridge_port *bridge_port, u16 vid) { - u16 pvid = mlxsw_sp_port->pvid == vid ? 0 : vid; + u16 pvid = mlxsw_sp_port->pvid == vid ? 0 : mlxsw_sp_port->pvid; struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid); @@ -3207,7 +3206,6 @@ mlxsw_sp_switchdev_vxlan_vlan_add(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_bridge_device *bridge_device, const struct net_device *vxlan_dev, u16 vid, bool flag_untagged, bool flag_pvid, - struct switchdev_trans *trans, struct netlink_ext_ack *extack) { struct vxlan_dev *vxlan = netdev_priv(vxlan_dev); @@ -3225,9 +3223,6 @@ mlxsw_sp_switchdev_vxlan_vlan_add(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_bridge_8021q_vxlan_dev_find(bridge_device->dev, vid)) return -EINVAL; - if (switchdev_trans_ph_prepare(trans)) - return 0; - if (!netif_running(vxlan_dev)) return 0; @@ -3345,6 +3340,9 @@ mlxsw_sp_switchdev_vxlan_vlans_add(struct net_device *vxlan_dev, port_obj_info->handled = true; + if (switchdev_trans_ph_commit(trans)) + return 0; + bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); if (!bridge_device) return -EINVAL; @@ -3358,8 +3356,7 @@ mlxsw_sp_switchdev_vxlan_vlans_add(struct net_device *vxlan_dev, err = mlxsw_sp_switchdev_vxlan_vlan_add(mlxsw_sp, bridge_device, vxlan_dev, vid, flag_untagged, - flag_pvid, trans, - extack); + flag_pvid, extack); if (err) return err; } diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c @@ -962,13 +962,10 @@ static void lan743x_phy_link_status_change(struct net_device *netdev) memset(&ksettings, 0, sizeof(ksettings)); phy_ethtool_get_link_ksettings(netdev, &ksettings); - local_advertisement = phy_read(phydev, MII_ADVERTISE); - if (local_advertisement < 0) - return; - - remote_advertisement = phy_read(phydev, MII_LPA); - if (remote_advertisement < 0) - return; + local_advertisement = + linkmode_adv_to_mii_adv_t(phydev->advertising); + remote_advertisement = + linkmode_adv_to_mii_adv_t(phydev->lp_advertising); lan743x_phy_update_flowcontrol(adapter, ksettings.base.duplex, diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -1619,6 +1619,10 @@ static void qed_ll2_post_rx_buffer_notify_fw(struct qed_hwfn *p_hwfn, cq_prod = qed_chain_get_prod_idx(&p_rx->rcq_chain); rx_prod.bd_prod = cpu_to_le16(bd_prod); rx_prod.cqe_prod = cpu_to_le16(cq_prod); + + /* Make sure chain element is updated before ringing the doorbell */ + dma_wmb(); + DIRECT_REG_WR(p_rx->set_prod_addr, *((u32 *)&rx_prod)); } diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c @@ -205,6 +205,8 @@ enum cfg_version { }; static const struct pci_device_id rtl8169_pci_tbl[] = { + { PCI_VDEVICE(REALTEK, 0x2502), RTL_CFG_1 }, + { PCI_VDEVICE(REALTEK, 0x2600), RTL_CFG_1 }, { PCI_VDEVICE(REALTEK, 0x8129), RTL_CFG_0 }, { PCI_VDEVICE(REALTEK, 0x8136), RTL_CFG_2 }, { PCI_VDEVICE(REALTEK, 0x8161), RTL_CFG_1 }, @@ -706,6 +708,7 @@ module_param(use_dac, int, 0); MODULE_PARM_DESC(use_dac, "Enable PCI DAC. Unsafe on 32 bit PCI slot."); module_param_named(debug, debug.msg_enable, int, 0); MODULE_PARM_DESC(debug, "Debug verbosity level (0=none, ..., 16=all)"); +MODULE_SOFTDEP("pre: realtek"); MODULE_LICENSE("GPL"); MODULE_FIRMWARE(FIRMWARE_8168D_1); MODULE_FIRMWARE(FIRMWARE_8168D_2); @@ -1679,11 +1682,13 @@ static bool rtl8169_reset_counters(struct rtl8169_private *tp) static bool rtl8169_update_counters(struct rtl8169_private *tp) { + u8 val = RTL_R8(tp, ChipCmd); + /* * Some chips are unable to dump tally counters when the receiver - * is disabled. + * is disabled. If 0xff chip may be in a PCI power-save state. */ - if ((RTL_R8(tp, ChipCmd) & CmdRxEnb) == 0) + if (!(val & CmdRxEnb) || val == 0xff) return true; return rtl8169_do_counters(tp, CounterDump); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c @@ -263,6 +263,7 @@ static int dwxgmac2_dma_interrupt(void __iomem *ioaddr, struct stmmac_extra_stats *x, u32 chan) { u32 intr_status = readl(ioaddr + XGMAC_DMA_CH_STATUS(chan)); + u32 intr_en = readl(ioaddr + XGMAC_DMA_CH_INT_EN(chan)); int ret = 0; /* ABNORMAL interrupts */ @@ -282,8 +283,7 @@ static int dwxgmac2_dma_interrupt(void __iomem *ioaddr, x->normal_irq_n++; if (likely(intr_status & XGMAC_RI)) { - u32 value = readl(ioaddr + XGMAC_DMA_CH_INT_EN(chan)); - if (likely(value & XGMAC_RIE)) { + if (likely(intr_en & XGMAC_RIE)) { x->rx_normal_irq_n++; ret |= handle_rx; } @@ -295,7 +295,7 @@ static int dwxgmac2_dma_interrupt(void __iomem *ioaddr, } /* Clear interrupts */ - writel(~0x0, ioaddr + XGMAC_DMA_CH_STATUS(chan)); + writel(intr_en & intr_status, ioaddr + XGMAC_DMA_CH_STATUS(chan)); return ret; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3517,27 +3517,28 @@ static int stmmac_napi_poll(struct napi_struct *napi, int budget) struct stmmac_channel *ch = container_of(napi, struct stmmac_channel, napi); struct stmmac_priv *priv = ch->priv_data; - int work_done = 0, work_rem = budget; + int work_done, rx_done = 0, tx_done = 0; u32 chan = ch->index; priv->xstats.napi_poll++; - if (ch->has_tx) { - int done = stmmac_tx_clean(priv, work_rem, chan); + if (ch->has_tx) + tx_done = stmmac_tx_clean(priv, budget, chan); + if (ch->has_rx) + rx_done = stmmac_rx(priv, budget, chan); - work_done += done; - work_rem -= done; - } - - if (ch->has_rx) { - int done = stmmac_rx(priv, work_rem, chan); + work_done = max(rx_done, tx_done); + work_done = min(work_done, budget); - work_done += done; - work_rem -= done; - } + if (work_done < budget && napi_complete_done(napi, work_done)) { + int stat; - if (work_done < budget && napi_complete_done(napi, work_done)) stmmac_enable_dma_irq(priv, priv->ioaddr, chan); + stat = stmmac_dma_interrupt_status(priv, priv->ioaddr, + &priv->xstats, chan); + if (stat && napi_reschedule(napi)) + stmmac_disable_dma_irq(priv, priv->ioaddr, chan); + } return work_done; } @@ -4160,6 +4161,18 @@ static int stmmac_hw_init(struct stmmac_priv *priv) return ret; } + /* Rx Watchdog is available in the COREs newer than the 3.40. + * In some case, for example on bugged HW this feature + * has to be disable and this can be done by passing the + * riwt_off field from the platform. + */ + if (((priv->synopsys_id >= DWMAC_CORE_3_50) || + (priv->plat->has_xgmac)) && (!priv->plat->riwt_off)) { + priv->use_riwt = 1; + dev_info(priv->device, + "Enable RX Mitigation via HW Watchdog Timer\n"); + } + return 0; } @@ -4292,18 +4305,6 @@ int stmmac_dvr_probe(struct device *device, if (flow_ctrl) priv->flow_ctrl = FLOW_AUTO; /* RX/TX pause on */ - /* Rx Watchdog is available in the COREs newer than the 3.40. - * In some case, for example on bugged HW this feature - * has to be disable and this can be done by passing the - * riwt_off field from the platform. - */ - if (((priv->synopsys_id >= DWMAC_CORE_3_50) || - (priv->plat->has_xgmac)) && (!priv->plat->riwt_off)) { - priv->use_riwt = 1; - dev_info(priv->device, - "Enable RX Mitigation via HW Watchdog Timer\n"); - } - /* Setup channels NAPI */ maxq = max(priv->plat->rx_queues_to_use, priv->plat->tx_queues_to_use); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c @@ -299,7 +299,17 @@ static int stmmac_pci_probe(struct pci_dev *pdev, */ static void stmmac_pci_remove(struct pci_dev *pdev) { + int i; + stmmac_dvr_remove(&pdev->dev); + + for (i = 0; i <= PCI_STD_RESOURCE_END; i++) { + if (pci_resource_len(pdev, i) == 0) + continue; + pcim_iounmap_regions(pdev, BIT(i)); + break; + } + pci_disable_device(pdev); } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -301,6 +301,8 @@ static int tc_setup_cbs(struct stmmac_priv *priv, /* Queue 0 is not AVB capable */ if (queue <= 0 || queue >= tx_queues_count) return -EINVAL; + if (!priv->dma_cap.av) + return -EOPNOTSUPP; if (priv->speed != SPEED_100 && priv->speed != SPEED_1000) return -EOPNOTSUPP; diff --git a/drivers/net/tun.c b/drivers/net/tun.c @@ -856,10 +856,6 @@ static int tun_attach(struct tun_struct *tun, struct file *file, err = 0; } - rcu_assign_pointer(tfile->tun, tun); - rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile); - tun->numqueues++; - if (tfile->detached) { tun_enable_queue(tfile); } else { @@ -876,6 +872,13 @@ static int tun_attach(struct tun_struct *tun, struct file *file, * refcnt. */ + /* Publish tfile->tun and tun->tfiles only after we've fully + * initialized tfile; otherwise we risk using half-initialized + * object. + */ + rcu_assign_pointer(tfile->tun, tun); + rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile); + tun->numqueues++; out: return err; } diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c @@ -179,10 +179,8 @@ int usbnet_generic_cdc_bind(struct usbnet *dev, struct usb_interface *intf) * probed with) and a slave/data interface; union * descriptors sort this all out. */ - info->control = usb_ifnum_to_if(dev->udev, - info->u->bMasterInterface0); - info->data = usb_ifnum_to_if(dev->udev, - info->u->bSlaveInterface0); + info->control = usb_ifnum_to_if(dev->udev, info->u->bMasterInterface0); + info->data = usb_ifnum_to_if(dev->udev, info->u->bSlaveInterface0); if (!info->control || !info->data) { dev_dbg(&intf->dev, "master #%u/%p slave #%u/%p\n", @@ -216,18 +214,16 @@ int usbnet_generic_cdc_bind(struct usbnet *dev, struct usb_interface *intf) /* a data interface altsetting does the real i/o */ d = &info->data->cur_altsetting->desc; if (d->bInterfaceClass != USB_CLASS_CDC_DATA) { - dev_dbg(&intf->dev, "slave class %u\n", - d->bInterfaceClass); + dev_dbg(&intf->dev, "slave class %u\n", d->bInterfaceClass); goto bad_desc; } skip: - if ( rndis && - header.usb_cdc_acm_descriptor && - header.usb_cdc_acm_descriptor->bmCapabilities) { - dev_dbg(&intf->dev, - "ACM capabilities %02x, not really RNDIS?\n", - header.usb_cdc_acm_descriptor->bmCapabilities); - goto bad_desc; + if (rndis && header.usb_cdc_acm_descriptor && + header.usb_cdc_acm_descriptor->bmCapabilities) { + dev_dbg(&intf->dev, + "ACM capabilities %02x, not really RNDIS?\n", + header.usb_cdc_acm_descriptor->bmCapabilities); + goto bad_desc; } if (header.usb_cdc_ether_desc && info->ether->wMaxSegmentSize) { @@ -238,7 +234,7 @@ skip: } if (header.usb_cdc_mdlm_desc && - memcmp(header.usb_cdc_mdlm_desc->bGUID, mbm_guid, 16)) { + memcmp(header.usb_cdc_mdlm_desc->bGUID, mbm_guid, 16)) { dev_dbg(&intf->dev, "GUID doesn't match\n"); goto bad_desc; } @@ -302,7 +298,7 @@ skip: if (info->control->cur_altsetting->desc.bNumEndpoints == 1) { struct usb_endpoint_descriptor *desc; - dev->status = &info->control->cur_altsetting->endpoint [0]; + dev->status = &info->control->cur_altsetting->endpoint[0]; desc = &dev->status->desc; if (!usb_endpoint_is_int_in(desc) || (le16_to_cpu(desc->wMaxPacketSize) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c @@ -123,6 +123,7 @@ static void qmimux_setup(struct net_device *dev) dev->addr_len = 0; dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; dev->netdev_ops = &qmimux_netdev_ops; + dev->mtu = 1500; dev->needs_free_netdev = true; } diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c @@ -1056,6 +1056,54 @@ static const struct net_device_ops uhdlc_ops = { .ndo_tx_timeout = uhdlc_tx_timeout, }; +static int hdlc_map_iomem(char *name, int init_flag, void __iomem **ptr) +{ + struct device_node *np; + struct platform_device *pdev; + struct resource *res; + static int siram_init_flag; + int ret = 0; + + np = of_find_compatible_node(NULL, NULL, name); + if (!np) + return -EINVAL; + + pdev = of_find_device_by_node(np); + if (!pdev) { + pr_err("%pOFn: failed to lookup pdev\n", np); + of_node_put(np); + return -EINVAL; + } + + of_node_put(np); + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + ret = -EINVAL; + goto error_put_device; + } + *ptr = ioremap(res->start, resource_size(res)); + if (!*ptr) { + ret = -ENOMEM; + goto error_put_device; + } + + /* We've remapped the addresses, and we don't need the device any + * more, so we should release it. + */ + put_device(&pdev->dev); + + if (init_flag && siram_init_flag == 0) { + memset_io(*ptr, 0, resource_size(res)); + siram_init_flag = 1; + } + return 0; + +error_put_device: + put_device(&pdev->dev); + + return ret; +} + static int ucc_hdlc_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; @@ -1150,6 +1198,15 @@ static int ucc_hdlc_probe(struct platform_device *pdev) ret = ucc_of_parse_tdm(np, utdm, ut_info); if (ret) goto free_utdm; + + ret = hdlc_map_iomem("fsl,t1040-qe-si", 0, + (void __iomem **)&utdm->si_regs); + if (ret) + goto free_utdm; + ret = hdlc_map_iomem("fsl,t1040-qe-siram", 1, + (void __iomem **)&utdm->siram); + if (ret) + goto unmap_si_regs; } if (of_property_read_u16(np, "fsl,hmask", &uhdlc_priv->hmask)) @@ -1158,7 +1215,7 @@ static int ucc_hdlc_probe(struct platform_device *pdev) ret = uhdlc_init(uhdlc_priv); if (ret) { dev_err(&pdev->dev, "Failed to init uhdlc\n"); - goto free_utdm; + goto undo_uhdlc_init; } dev = alloc_hdlcdev(uhdlc_priv); @@ -1187,6 +1244,9 @@ static int ucc_hdlc_probe(struct platform_device *pdev) free_dev: free_netdev(dev); undo_uhdlc_init: + iounmap(utdm->siram); +unmap_si_regs: + iounmap(utdm->si_regs); free_utdm: if (uhdlc_priv->tsa) kfree(utdm); diff --git a/drivers/phy/ti/Kconfig b/drivers/phy/ti/Kconfig @@ -82,6 +82,7 @@ config PHY_TI_GMII_SEL default y if TI_CPSW=y depends on TI_CPSW || COMPILE_TEST select GENERIC_PHY + select REGMAP default m help This driver supports configuring of the TI CPSW Port mode depending on diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c @@ -224,7 +224,8 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) extoff = NULL; break; } - if (extoff->n_samples > PTP_MAX_SAMPLES) { + if (extoff->n_samples > PTP_MAX_SAMPLES + || extoff->rsv[0] || extoff->rsv[1] || extoff->rsv[2]) { err = -EINVAL; break; } diff --git a/drivers/soc/fsl/qe/qe_tdm.c b/drivers/soc/fsl/qe/qe_tdm.c @@ -44,10 +44,6 @@ int ucc_of_parse_tdm(struct device_node *np, struct ucc_tdm *utdm, const char *sprop; int ret = 0; u32 val; - struct resource *res; - struct device_node *np2; - static int siram_init_flag; - struct platform_device *pdev; sprop = of_get_property(np, "fsl,rx-sync-clock", NULL); if (sprop) { @@ -124,57 +120,6 @@ int ucc_of_parse_tdm(struct device_node *np, struct ucc_tdm *utdm, utdm->siram_entry_id = val; set_si_param(utdm, ut_info); - - np2 = of_find_compatible_node(NULL, NULL, "fsl,t1040-qe-si"); - if (!np2) - return -EINVAL; - - pdev = of_find_device_by_node(np2); - if (!pdev) { - pr_err("%pOFn: failed to lookup pdev\n", np2); - of_node_put(np2); - return -EINVAL; - } - - of_node_put(np2); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - utdm->si_regs = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(utdm->si_regs)) { - ret = PTR_ERR(utdm->si_regs); - goto err_miss_siram_property; - } - - np2 = of_find_compatible_node(NULL, NULL, "fsl,t1040-qe-siram"); - if (!np2) { - ret = -EINVAL; - goto err_miss_siram_property; - } - - pdev = of_find_device_by_node(np2); - if (!pdev) { - ret = -EINVAL; - pr_err("%pOFn: failed to lookup pdev\n", np2); - of_node_put(np2); - goto err_miss_siram_property; - } - - of_node_put(np2); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - utdm->siram = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(utdm->siram)) { - ret = PTR_ERR(utdm->siram); - goto err_miss_siram_property; - } - - if (siram_init_flag == 0) { - memset_io(utdm->siram, 0, resource_size(res)); - siram_init_flag = 1; - } - - return ret; - -err_miss_siram_property: - devm_iounmap(&pdev->dev, utdm->si_regs); return ret; } EXPORT_SYMBOL(ucc_of_parse_tdm); diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c @@ -642,7 +642,7 @@ static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid) hash_del_rcu(&vsock->hash); vsock->guest_cid = guest_cid; - hash_add_rcu(vhost_vsock_hash, &vsock->hash, guest_cid); + hash_add_rcu(vhost_vsock_hash, &vsock->hash, vsock->guest_cid); mutex_unlock(&vhost_vsock_mutex); return 0; diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h @@ -172,6 +172,7 @@ struct bpf_verifier_state_list { #define BPF_ALU_SANITIZE_SRC 1U #define BPF_ALU_SANITIZE_DST 2U #define BPF_ALU_NEG_VALUE (1U << 2) +#define BPF_ALU_NON_POINTER (1U << 3) #define BPF_ALU_SANITIZE (BPF_ALU_SANITIZE_SRC | \ BPF_ALU_SANITIZE_DST) diff --git a/include/linux/bpfilter.h b/include/linux/bpfilter.h @@ -3,13 +3,22 @@ #define _LINUX_BPFILTER_H #include <uapi/linux/bpfilter.h> +#include <linux/umh.h> struct sock; int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen); int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen); -extern int (*bpfilter_process_sockopt)(struct sock *sk, int optname, - char __user *optval, - unsigned int optlen, bool is_set); +struct bpfilter_umh_ops { + struct umh_info info; + /* since ip_getsockopt() can run in parallel, serialize access to umh */ + struct mutex lock; + int (*sockopt)(struct sock *sk, int optname, + char __user *optval, + unsigned int optlen, bool is_set); + int (*start)(void); + bool stop; +}; +extern struct bpfilter_umh_ops bpfilter_ops; #endif diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h @@ -663,6 +663,37 @@ out: static inline void qed_chain_set_prod(struct qed_chain *p_chain, u32 prod_idx, void *p_prod_elem) { + if (p_chain->mode == QED_CHAIN_MODE_PBL) { + u32 cur_prod, page_mask, page_cnt, page_diff; + + cur_prod = is_chain_u16(p_chain) ? p_chain->u.chain16.prod_idx : + p_chain->u.chain32.prod_idx; + + /* Assume that number of elements in a page is power of 2 */ + page_mask = ~p_chain->elem_per_page_mask; + + /* Use "cur_prod - 1" and "prod_idx - 1" since producer index + * reaches the first element of next page before the page index + * is incremented. See qed_chain_produce(). + * Index wrap around is not a problem because the difference + * between current and given producer indices is always + * positive and lower than the chain's capacity. + */ + page_diff = (((cur_prod - 1) & page_mask) - + ((prod_idx - 1) & page_mask)) / + p_chain->elem_per_page; + + page_cnt = qed_chain_get_page_cnt(p_chain); + if (is_chain_u16(p_chain)) + p_chain->pbl.c.u16.prod_page_idx = + (p_chain->pbl.c.u16.prod_page_idx - + page_diff + page_cnt) % page_cnt; + else + p_chain->pbl.c.u32.prod_page_idx = + (p_chain->pbl.c.u32.prod_page_idx - + page_diff + page_cnt) % page_cnt; + } + if (is_chain_u16(p_chain)) p_chain->u.chain16.prod_idx = (u16) prod_idx; else diff --git a/include/linux/sched.h b/include/linux/sched.h @@ -1406,6 +1406,7 @@ extern struct pid *cad_pid; #define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */ #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ #define PF_MEMSTALL 0x01000000 /* Stalled due to lack of memory */ +#define PF_UMH 0x02000000 /* I'm an Usermodehelper process */ #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ @@ -1904,6 +1905,14 @@ static inline void rseq_execve(struct task_struct *t) #endif +void __exit_umh(struct task_struct *tsk); + +static inline void exit_umh(struct task_struct *tsk) +{ + if (unlikely(tsk->flags & PF_UMH)) + __exit_umh(tsk); +} + #ifdef CONFIG_DEBUG_RSEQ void rseq_syscall(struct pt_regs *regs); diff --git a/include/linux/umh.h b/include/linux/umh.h @@ -47,6 +47,8 @@ struct umh_info { const char *cmdline; struct file *pipe_to_umh; struct file *pipe_from_umh; + struct list_head list; + void (*cleanup)(struct umh_info *info); pid_t pid; }; int fork_usermode_blob(void *data, size_t len, struct umh_info *info); diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h @@ -147,7 +147,7 @@ struct ptp_pin_desc { #define PTP_SYS_OFFSET_PRECISE \ _IOWR(PTP_CLK_MAGIC, 8, struct ptp_sys_offset_precise) #define PTP_SYS_OFFSET_EXTENDED \ - _IOW(PTP_CLK_MAGIC, 9, struct ptp_sys_offset_extended) + _IOWR(PTP_CLK_MAGIC, 9, struct ptp_sys_offset_extended) struct ptp_extts_event { struct ptp_clock_time t; /* Time event occured. */ diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c @@ -1219,8 +1219,6 @@ static void btf_bitfield_seq_show(void *data, u8 bits_offset, u8 nr_copy_bits; u64 print_num; - data += BITS_ROUNDDOWN_BYTES(bits_offset); - bits_offset = BITS_PER_BYTE_MASKED(bits_offset); nr_copy_bits = nr_bits + bits_offset; nr_copy_bytes = BITS_ROUNDUP_BYTES(nr_copy_bits); @@ -1255,7 +1253,9 @@ static void btf_int_bits_seq_show(const struct btf *btf, * BTF_INT_OFFSET() cannot exceed 64 bits. */ total_bits_offset = bits_offset + BTF_INT_OFFSET(int_data); - btf_bitfield_seq_show(data, total_bits_offset, nr_bits, m); + data += BITS_ROUNDDOWN_BYTES(total_bits_offset); + bits_offset = BITS_PER_BYTE_MASKED(total_bits_offset); + btf_bitfield_seq_show(data, bits_offset, nr_bits, m); } static void btf_int_seq_show(const struct btf *btf, const struct btf_type *t, @@ -2001,12 +2001,12 @@ static void btf_struct_seq_show(const struct btf *btf, const struct btf_type *t, member_offset = btf_member_bit_offset(t, member); bitfield_size = btf_member_bitfield_size(t, member); + bytes_offset = BITS_ROUNDDOWN_BYTES(member_offset); + bits8_offset = BITS_PER_BYTE_MASKED(member_offset); if (bitfield_size) { - btf_bitfield_seq_show(data, member_offset, + btf_bitfield_seq_show(data + bytes_offset, bits8_offset, bitfield_size, m); } else { - bytes_offset = BITS_ROUNDDOWN_BYTES(member_offset); - bits8_offset = BITS_PER_BYTE_MASKED(member_offset); ops = btf_type_ops(member_type); ops->seq_show(btf, member_type, member->type, data + bytes_offset, bits8_offset, m); diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c @@ -260,7 +260,7 @@ static int stack_map_get_build_id(struct vm_area_struct *vma, return -EFAULT; /* page not mapped */ ret = -EINVAL; - page_addr = page_address(page); + page_addr = kmap_atomic(page); ehdr = (Elf32_Ehdr *)page_addr; /* compare magic x7f "ELF" */ @@ -276,6 +276,7 @@ static int stack_map_get_build_id(struct vm_area_struct *vma, else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64) ret = stack_map_get_build_id_64(page_addr, build_id); out: + kunmap_atomic(page_addr); put_page(page); return ret; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c @@ -3103,6 +3103,40 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, } } +static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env, + const struct bpf_insn *insn) +{ + return env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K; +} + +static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux, + u32 alu_state, u32 alu_limit) +{ + /* If we arrived here from different branches with different + * state or limits to sanitize, then this won't work. + */ + if (aux->alu_state && + (aux->alu_state != alu_state || + aux->alu_limit != alu_limit)) + return -EACCES; + + /* Corresponding fixup done in fixup_bpf_calls(). */ + aux->alu_state = alu_state; + aux->alu_limit = alu_limit; + return 0; +} + +static int sanitize_val_alu(struct bpf_verifier_env *env, + struct bpf_insn *insn) +{ + struct bpf_insn_aux_data *aux = cur_aux(env); + + if (can_skip_alu_sanitation(env, insn)) + return 0; + + return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0); +} + static int sanitize_ptr_alu(struct bpf_verifier_env *env, struct bpf_insn *insn, const struct bpf_reg_state *ptr_reg, @@ -3117,7 +3151,7 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, struct bpf_reg_state tmp; bool ret; - if (env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K) + if (can_skip_alu_sanitation(env, insn)) return 0; /* We already marked aux for masking from non-speculative @@ -3133,19 +3167,8 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg)) return 0; - - /* If we arrived here from different branches with different - * limits to sanitize, then this won't work. - */ - if (aux->alu_state && - (aux->alu_state != alu_state || - aux->alu_limit != alu_limit)) + if (update_alu_sanitation_state(aux, alu_state, alu_limit)) return -EACCES; - - /* Corresponding fixup done in fixup_bpf_calls(). */ - aux->alu_state = alu_state; - aux->alu_limit = alu_limit; - do_sim: /* Simulate and find potential out-of-bounds access under * speculative execution from truncation as a result of @@ -3418,6 +3441,8 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, s64 smin_val, smax_val; u64 umin_val, umax_val; u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32; + u32 dst = insn->dst_reg; + int ret; if (insn_bitness == 32) { /* Relevant for 32-bit RSH: Information can propagate towards @@ -3452,6 +3477,11 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, switch (opcode) { case BPF_ADD: + ret = sanitize_val_alu(env, insn); + if (ret < 0) { + verbose(env, "R%d tried to add from different pointers or scalars\n", dst); + return ret; + } if (signed_add_overflows(dst_reg->smin_value, smin_val) || signed_add_overflows(dst_reg->smax_value, smax_val)) { dst_reg->smin_value = S64_MIN; @@ -3471,6 +3501,11 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off); break; case BPF_SUB: + ret = sanitize_val_alu(env, insn); + if (ret < 0) { + verbose(env, "R%d tried to sub from different pointers or scalars\n", dst); + return ret; + } if (signed_sub_overflows(dst_reg->smin_value, smax_val) || signed_sub_overflows(dst_reg->smax_value, smin_val)) { /* Overflow possible, we know nothing */ diff --git a/kernel/exit.c b/kernel/exit.c @@ -866,6 +866,7 @@ void __noreturn do_exit(long code) exit_task_namespaces(tsk); exit_task_work(tsk); exit_thread(tsk); + exit_umh(tsk); /* * Flush inherited counters to the parent - before the parent diff --git a/kernel/umh.c b/kernel/umh.c @@ -37,6 +37,8 @@ static kernel_cap_t usermodehelper_bset = CAP_FULL_SET; static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET; static DEFINE_SPINLOCK(umh_sysctl_lock); static DECLARE_RWSEM(umhelper_sem); +static LIST_HEAD(umh_list); +static DEFINE_MUTEX(umh_list_lock); static void call_usermodehelper_freeinfo(struct subprocess_info *info) { @@ -100,10 +102,12 @@ static int call_usermodehelper_exec_async(void *data) commit_creds(new); sub_info->pid = task_pid_nr(current); - if (sub_info->file) + if (sub_info->file) { retval = do_execve_file(sub_info->file, sub_info->argv, sub_info->envp); - else + if (!retval) + current->flags |= PF_UMH; + } else retval = do_execve(getname_kernel(sub_info->path), (const char __user *const __user *)sub_info->argv, (const char __user *const __user *)sub_info->envp); @@ -517,6 +521,11 @@ int fork_usermode_blob(void *data, size_t len, struct umh_info *info) goto out; err = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC); + if (!err) { + mutex_lock(&umh_list_lock); + list_add(&info->list, &umh_list); + mutex_unlock(&umh_list_lock); + } out: fput(file); return err; @@ -679,6 +688,26 @@ static int proc_cap_handler(struct ctl_table *table, int write, return 0; } +void __exit_umh(struct task_struct *tsk) +{ + struct umh_info *info; + pid_t pid = tsk->pid; + + mutex_lock(&umh_list_lock); + list_for_each_entry(info, &umh_list, list) { + if (info->pid == pid) { + list_del(&info->list); + mutex_unlock(&umh_list_lock); + goto out; + } + } + mutex_unlock(&umh_list_lock); + return; +out: + if (info->cleanup) + info->cleanup(info); +} + struct ctl_table usermodehelper_table[] = { { .procname = "bset", diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c @@ -13,39 +13,24 @@ extern char bpfilter_umh_start; extern char bpfilter_umh_end; -static struct umh_info info; -/* since ip_getsockopt() can run in parallel, serialize access to umh */ -static DEFINE_MUTEX(bpfilter_lock); - -static void shutdown_umh(struct umh_info *info) +static void shutdown_umh(void) { struct task_struct *tsk; - if (!info->pid) + if (bpfilter_ops.stop) return; - tsk = get_pid_task(find_vpid(info->pid), PIDTYPE_PID); + + tsk = get_pid_task(find_vpid(bpfilter_ops.info.pid), PIDTYPE_PID); if (tsk) { force_sig(SIGKILL, tsk); put_task_struct(tsk); } - fput(info->pipe_to_umh); - fput(info->pipe_from_umh); - info->pid = 0; } static void __stop_umh(void) { - if (IS_ENABLED(CONFIG_INET)) { - bpfilter_process_sockopt = NULL; - shutdown_umh(&info); - } -} - -static void stop_umh(void) -{ - mutex_lock(&bpfilter_lock); - __stop_umh(); - mutex_unlock(&bpfilter_lock); + if (IS_ENABLED(CONFIG_INET)) + shutdown_umh(); } static int __bpfilter_process_sockopt(struct sock *sk, int optname, @@ -63,10 +48,10 @@ static int __bpfilter_process_sockopt(struct sock *sk, int optname, req.cmd = optname; req.addr = (long __force __user)optval; req.len = optlen; - mutex_lock(&bpfilter_lock); - if (!info.pid) + if (!bpfilter_ops.info.pid) goto out; - n = __kernel_write(info.pipe_to_umh, &req, sizeof(req), &pos); + n = __kernel_write(bpfilter_ops.info.pipe_to_umh, &req, sizeof(req), + &pos); if (n != sizeof(req)) { pr_err("write fail %zd\n", n); __stop_umh(); @@ -74,7 +59,8 @@ static int __bpfilter_process_sockopt(struct sock *sk, int optname, goto out; } pos = 0; - n = kernel_read(info.pipe_from_umh, &reply, sizeof(reply), &pos); + n = kernel_read(bpfilter_ops.info.pipe_from_umh, &reply, sizeof(reply), + &pos); if (n != sizeof(reply)) { pr_err("read fail %zd\n", n); __stop_umh(); @@ -83,37 +69,59 @@ static int __bpfilter_process_sockopt(struct sock *sk, int optname, } ret = reply.status; out: - mutex_unlock(&bpfilter_lock); return ret; } -static int __init load_umh(void) +static int start_umh(void) { int err; /* fork usermode process */ - info.cmdline = "bpfilter_umh"; err = fork_usermode_blob(&bpfilter_umh_start, &bpfilter_umh_end - &bpfilter_umh_start, - &info); + &bpfilter_ops.info); if (err) return err; - pr_info("Loaded bpfilter_umh pid %d\n", info.pid); + bpfilter_ops.stop = false; + pr_info("Loaded bpfilter_umh pid %d\n", bpfilter_ops.info.pid); /* health check that usermode process started correctly */ if (__bpfilter_process_sockopt(NULL, 0, NULL, 0, 0) != 0) { - stop_umh(); + shutdown_umh(); return -EFAULT; } - if (IS_ENABLED(CONFIG_INET)) - bpfilter_process_sockopt = &__bpfilter_process_sockopt; return 0; } +static int __init load_umh(void) +{ + int err; + + mutex_lock(&bpfilter_ops.lock); + if (!bpfilter_ops.stop) { + err = -EFAULT; + goto out; + } + err = start_umh(); + if (!err && IS_ENABLED(CONFIG_INET)) { + bpfilter_ops.sockopt = &__bpfilter_process_sockopt; + bpfilter_ops.start = &start_umh; + } +out: + mutex_unlock(&bpfilter_ops.lock); + return err; +} + static void __exit fini_umh(void) { - stop_umh(); + mutex_lock(&bpfilter_ops.lock); + if (IS_ENABLED(CONFIG_INET)) { + shutdown_umh(); + bpfilter_ops.start = NULL; + bpfilter_ops.sockopt = NULL; + } + mutex_unlock(&bpfilter_ops.lock); } module_init(load_umh); module_exit(fini_umh); diff --git a/net/bpfilter/bpfilter_umh_blob.S b/net/bpfilter/bpfilter_umh_blob.S @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ - .section .init.rodata, "a" + .section .bpfilter_umh, "a" .global bpfilter_umh_start bpfilter_umh_start: .incbin "net/bpfilter/bpfilter_umh" diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c @@ -65,6 +65,7 @@ EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit); int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { + skb->tstamp = 0; return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING, net, sk, skb, NULL, skb->dev, br_dev_queue_push_xmit); diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c @@ -265,7 +265,7 @@ int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_ struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); int ret; - if (neigh->hh.hh_len) { + if ((neigh->nud_state & NUD_CONNECTED) && neigh->hh.hh_len) { neigh_hh_bridge(&neigh->hh, skb); skb->dev = nf_bridge->physindev; ret = br_handle_frame_finish(net, sk, skb); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h @@ -107,6 +107,7 @@ struct br_tunnel_info { /* private vlan flags */ enum { BR_VLFLAG_PER_PORT_STATS = BIT(0), + BR_VLFLAG_ADDED_BY_SWITCHDEV = BIT(1), }; /** diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c @@ -80,16 +80,18 @@ static bool __vlan_add_flags(struct net_bridge_vlan *v, u16 flags) } static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br, - u16 vid, u16 flags, struct netlink_ext_ack *extack) + struct net_bridge_vlan *v, u16 flags, + struct netlink_ext_ack *extack) { int err; /* Try switchdev op first. In case it is not supported, fallback to * 8021q add. */ - err = br_switchdev_port_vlan_add(dev, vid, flags, extack); + err = br_switchdev_port_vlan_add(dev, v->vid, flags, extack); if (err == -EOPNOTSUPP) - return vlan_vid_add(dev, br->vlan_proto, vid); + return vlan_vid_add(dev, br->vlan_proto, v->vid); + v->priv_flags |= BR_VLFLAG_ADDED_BY_SWITCHDEV; return err; } @@ -121,19 +123,17 @@ static void __vlan_del_list(struct net_bridge_vlan *v) } static int __vlan_vid_del(struct net_device *dev, struct net_bridge *br, - u16 vid) + const struct net_bridge_vlan *v) { int err; /* Try switchdev op first. In case it is not supported, fallback to * 8021q del. */ - err = br_switchdev_port_vlan_del(dev, vid); - if (err == -EOPNOTSUPP) { - vlan_vid_del(dev, br->vlan_proto, vid); - return 0; - } - return err; + err = br_switchdev_port_vlan_del(dev, v->vid); + if (!(v->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV)) + vlan_vid_del(dev, br->vlan_proto, v->vid); + return err == -EOPNOTSUPP ? 0 : err; } /* Returns a master vlan, if it didn't exist it gets created. In all cases a @@ -242,7 +242,7 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags, * This ensures tagged traffic enters the bridge when * promiscuous mode is disabled by br_manage_promisc(). */ - err = __vlan_vid_add(dev, br, v->vid, flags, extack); + err = __vlan_vid_add(dev, br, v, flags, extack); if (err) goto out; @@ -305,7 +305,7 @@ out_fdb_insert: out_filt: if (p) { - __vlan_vid_del(dev, br, v->vid); + __vlan_vid_del(dev, br, v); if (masterv) { if (v->stats && masterv->stats != v->stats) free_percpu(v->stats); @@ -338,7 +338,7 @@ static int __vlan_del(struct net_bridge_vlan *v) __vlan_delete_pvid(vg, v->vid); if (p) { - err = __vlan_vid_del(p->dev, p->br, v->vid); + err = __vlan_vid_del(p->dev, p->br, v); if (err) goto out; } else { diff --git a/net/can/gw.c b/net/can/gw.c @@ -416,13 +416,29 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) while (modidx < MAX_MODFUNCTIONS && gwj->mod.modfunc[modidx]) (*gwj->mod.modfunc[modidx++])(cf, &gwj->mod); - /* check for checksum updates when the CAN frame has been modified */ + /* Has the CAN frame been modified? */ if (modidx) { - if (gwj->mod.csumfunc.crc8) + /* get available space for the processed CAN frame type */ + int max_len = nskb->len - offsetof(struct can_frame, data); + + /* dlc may have changed, make sure it fits to the CAN frame */ + if (cf->can_dlc > max_len) + goto out_delete; + + /* check for checksum updates in classic CAN length only */ + if (gwj->mod.csumfunc.crc8) { + if (cf->can_dlc > 8) + goto out_delete; + (*gwj->mod.csumfunc.crc8)(cf, &gwj->mod.csum.crc8); + } + + if (gwj->mod.csumfunc.xor) { + if (cf->can_dlc > 8) + goto out_delete; - if (gwj->mod.csumfunc.xor) (*gwj->mod.csumfunc.xor)(cf, &gwj->mod.csum.xor); + } } /* clear the skb timestamp if not configured the other way */ @@ -434,6 +450,14 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) gwj->dropped_frames++; else gwj->handled_frames++; + + return; + + out_delete: + /* delete frame due to misconfiguration */ + gwj->deleted_frames++; + kfree_skb(nskb); + return; } static inline int cgw_register_filter(struct net *net, struct cgw_job *gwj) diff --git a/net/core/filter.c b/net/core/filter.c @@ -4203,7 +4203,7 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, /* Only some options are supported */ switch (optname) { case TCP_BPF_IW: - if (val <= 0 || tp->data_segs_out > 0) + if (val <= 0 || tp->data_segs_out > tp->syn_data) ret = -EINVAL; else tp->snd_cwnd = val; diff --git a/net/core/neighbour.c b/net/core/neighbour.c @@ -18,6 +18,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/slab.h> +#include <linux/kmemleak.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/module.h> @@ -443,12 +444,14 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) ret = kmalloc(sizeof(*ret), GFP_ATOMIC); if (!ret) return NULL; - if (size <= PAGE_SIZE) + if (size <= PAGE_SIZE) { buckets = kzalloc(size, GFP_ATOMIC); - else + } else { buckets = (struct neighbour __rcu **) __get_free_pages(GFP_ATOMIC | __GFP_ZERO, get_order(size)); + kmemleak_alloc(buckets, size, 0, GFP_ATOMIC); + } if (!buckets) { kfree(ret); return NULL; @@ -468,10 +471,12 @@ static void neigh_hash_free_rcu(struct rcu_head *head) size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *); struct neighbour __rcu **buckets = nht->hash_buckets; - if (size <= PAGE_SIZE) + if (size <= PAGE_SIZE) { kfree(buckets); - else + } else { + kmemleak_free(buckets); free_pages((unsigned long)buckets, get_order(size)); + } kfree(nht); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c @@ -5270,7 +5270,6 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len, unsigned long chunk; struct sk_buff *skb; struct page *page; - gfp_t gfp_head; int i; *errcode = -EMSGSIZE; @@ -5280,12 +5279,8 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len, if (npages > MAX_SKB_FRAGS) return NULL; - gfp_head = gfp_mask; - if (gfp_head & __GFP_DIRECT_RECLAIM) - gfp_head |= __GFP_RETRY_MAYFAIL; - *errcode = -ENOBUFS; - skb = alloc_skb(header_len, gfp_head); + skb = alloc_skb(header_len, gfp_mask); if (!skb) return NULL; diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c @@ -1,28 +1,54 @@ // SPDX-License-Identifier: GPL-2.0 +#include <linux/init.h> +#include <linux/module.h> #include <linux/uaccess.h> #include <linux/bpfilter.h> #include <uapi/linux/bpf.h> #include <linux/wait.h> #include <linux/kmod.h> +#include <linux/fs.h> +#include <linux/file.h> -int (*bpfilter_process_sockopt)(struct sock *sk, int optname, - char __user *optval, - unsigned int optlen, bool is_set); -EXPORT_SYMBOL_GPL(bpfilter_process_sockopt); +struct bpfilter_umh_ops bpfilter_ops; +EXPORT_SYMBOL_GPL(bpfilter_ops); + +static void bpfilter_umh_cleanup(struct umh_info *info) +{ + mutex_lock(&bpfilter_ops.lock); + bpfilter_ops.stop = true; + fput(info->pipe_to_umh); + fput(info->pipe_from_umh); + info->pid = 0; + mutex_unlock(&bpfilter_ops.lock); +} static int bpfilter_mbox_request(struct sock *sk, int optname, char __user *optval, unsigned int optlen, bool is_set) { - if (!bpfilter_process_sockopt) { - int err = request_module("bpfilter"); + int err; + mutex_lock(&bpfilter_ops.lock); + if (!bpfilter_ops.sockopt) { + mutex_unlock(&bpfilter_ops.lock); + err = request_module("bpfilter"); + mutex_lock(&bpfilter_ops.lock); if (err) - return err; - if (!bpfilter_process_sockopt) - return -ECHILD; + goto out; + if (!bpfilter_ops.sockopt) { + err = -ECHILD; + goto out; + } + } + if (bpfilter_ops.stop) { + err = bpfilter_ops.start(); + if (err) + goto out; } - return bpfilter_process_sockopt(sk, optname, optval, optlen, is_set); + err = bpfilter_ops.sockopt(sk, optname, optval, optlen, is_set); +out: + mutex_unlock(&bpfilter_ops.lock); + return err; } int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char __user *optval, @@ -41,3 +67,15 @@ int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval, return bpfilter_mbox_request(sk, optname, optval, len, false); } + +static int __init bpfilter_sockopt_init(void) +{ + mutex_init(&bpfilter_ops.lock); + bpfilter_ops.stop = true; + bpfilter_ops.info.cmdline = "bpfilter_umh"; + bpfilter_ops.info.cleanup = &bpfilter_umh_cleanup; + + return 0; +} + +module_init(bpfilter_sockopt_init); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c @@ -1826,7 +1826,7 @@ put_tgt_net: if (fillargs.netnsid >= 0) put_net(tgt_net); - return err < 0 ? err : skb->len; + return skb->len ? : err; } static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c @@ -1065,7 +1065,8 @@ static int gue_err(struct sk_buff *skb, u32 info) * recursion. Besides, this kind of encapsulation can't even be * configured currently. Discard this. */ - if (guehdr->proto_ctype == IPPROTO_UDP) + if (guehdr->proto_ctype == IPPROTO_UDP || + guehdr->proto_ctype == IPPROTO_UDPLITE) return -EOPNOTSUPP; skb_set_transport_header(skb, -(int)sizeof(struct icmphdr)); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c @@ -148,19 +148,17 @@ static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb) static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) { + __be16 _ports[2], *ports; struct sockaddr_in sin; - __be16 *ports; - int end; - - end = skb_transport_offset(skb) + 4; - if (end > 0 && !pskb_may_pull(skb, end)) - return; /* All current transport protocols have the port numbers in the * first four bytes of the transport header and this function is * written with this assumption in mind. */ - ports = (__be16 *)skb_transport_header(skb); + ports = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_ports), &_ports); + if (!ports) + return; sin.sin_family = AF_INET; sin.sin_addr.s_addr = ip_hdr(skb)->daddr; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c @@ -226,7 +226,7 @@ static int tcp_write_timeout(struct sock *sk) if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { if (icsk->icsk_retransmits) { dst_negative_advice(sk); - } else if (!tp->syn_data && !tp->syn_fastopen) { + } else { sk_rethink_txhash(sk); } retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c @@ -5154,7 +5154,7 @@ put_tgt_net: if (fillargs.netnsid >= 0) put_net(tgt_net); - return err < 0 ? err : skb->len; + return skb->len ? : err; } static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c @@ -310,6 +310,7 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, /* Check if the address belongs to the host. */ if (addr_type == IPV6_ADDR_MAPPED) { + struct net_device *dev = NULL; int chk_addr_ret; /* Binding to v4-mapped address on a v6-only socket @@ -320,9 +321,20 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, goto out; } + rcu_read_lock(); + if (sk->sk_bound_dev_if) { + dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); + if (!dev) { + err = -ENODEV; + goto out_unlock; + } + } + /* Reproduce AF_INET checks to make the bindings consistent */ v4addr = addr->sin6_addr.s6_addr32[3]; - chk_addr_ret = inet_addr_type(net, v4addr); + chk_addr_ret = inet_addr_type_dev_table(net, dev, v4addr); + rcu_read_unlock(); + if (!inet_can_nonlocal_bind(net, inet) && v4addr != htonl(INADDR_ANY) && chk_addr_ret != RTN_LOCAL && diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c @@ -341,6 +341,7 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info) skb_reset_network_header(skb); iph = ipv6_hdr(skb); iph->daddr = fl6->daddr; + ip6_flow_hdr(iph, 0, 0); serr = SKB_EXT_ERR(skb); serr->ee.ee_errno = err; @@ -700,17 +701,15 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, } if (np->rxopt.bits.rxorigdstaddr) { struct sockaddr_in6 sin6; - __be16 *ports; - int end; + __be16 _ports[2], *ports; - end = skb_transport_offset(skb) + 4; - if (end <= 0 || pskb_may_pull(skb, end)) { + ports = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_ports), &_ports); + if (ports) { /* All current transport protocols have the port numbers in the * first four bytes of the transport header and this function is * written with this assumption in mind. */ - ports = (__be16 *)skb_transport_header(skb); - sin6.sin6_family = AF_INET6; sin6.sin6_addr = ipv6_hdr(skb)->daddr; sin6.sin6_port = ports[1]; diff --git a/net/ipv6/fou6.c b/net/ipv6/fou6.c @@ -131,6 +131,14 @@ static int gue6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (validate_gue_flags(guehdr, optlen)) return -EINVAL; + /* Handling exceptions for direct UDP encapsulation in GUE would lead to + * recursion. Besides, this kind of encapsulation can't even be + * configured currently. Discard this. + */ + if (guehdr->proto_ctype == IPPROTO_UDP || + guehdr->proto_ctype == IPPROTO_UDPLITE) + return -EOPNOTSUPP; + skb_set_transport_header(skb, -(int)sizeof(struct icmp6hdr)); ret = gue6_err_proto_handler(guehdr->proto_ctype, skb, opt, type, code, offset, info); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c @@ -423,10 +423,10 @@ static int icmp6_iif(const struct sk_buff *skb) static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, const struct in6_addr *force_saddr) { - struct net *net = dev_net(skb->dev); struct inet6_dev *idev = NULL; struct ipv6hdr *hdr = ipv6_hdr(skb); struct sock *sk; + struct net *net; struct ipv6_pinfo *np; const struct in6_addr *saddr = NULL; struct dst_entry *dst; @@ -437,12 +437,16 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, int iif = 0; int addr_type = 0; int len; - u32 mark = IP6_REPLY_MARK(net, skb->mark); + u32 mark; if ((u8 *)hdr < skb->head || (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb)) return; + if (!skb->dev) + return; + net = dev_net(skb->dev); + mark = IP6_REPLY_MARK(net, skb->mark); /* * Make sure we respect the rules * i.e. RFC 1885 2.4(e) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c @@ -1390,10 +1390,7 @@ do_udp_sendmsg: ipc6.opt = opt; fl6.flowi6_proto = sk->sk_protocol; - if (!ipv6_addr_any(daddr)) - fl6.daddr = *daddr; - else - fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ + fl6.daddr = *daddr; if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr)) fl6.saddr = np->saddr; fl6.fl6_sport = inet->inet_sport; @@ -1421,6 +1418,9 @@ do_udp_sendmsg: } } + if (ipv6_addr_any(&fl6.daddr)) + fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ + final_p = fl6_update_dst(&fl6, opt, &final); if (final_p) connected = false; diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c @@ -276,10 +276,12 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key) nexthdr = ipv6_find_hdr(skb, &payload_ofs, -1, &frag_off, &flags); if (flags & IP6_FH_F_FRAG) { - if (frag_off) + if (frag_off) { key->ip.frag = OVS_FRAG_TYPE_LATER; - else - key->ip.frag = OVS_FRAG_TYPE_FIRST; + key->ip.proto = nexthdr; + return 0; + } + key->ip.frag = OVS_FRAG_TYPE_FIRST; } else { key->ip.frag = OVS_FRAG_TYPE_NONE; } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c @@ -2628,7 +2628,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) addr = saddr->sll_halen ? saddr->sll_addr : NULL; dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); if (addr && dev && saddr->sll_halen < dev->addr_len) - goto out; + goto out_put; } err = -ENXIO; @@ -2828,7 +2828,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) addr = saddr->sll_halen ? saddr->sll_addr : NULL; dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex); if (addr && dev && saddr->sll_halen < dev->addr_len) - goto out; + goto out_unlock; } err = -ENXIO; diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c @@ -522,7 +522,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0) i = 1; else - i = ceil(be32_to_cpu(rm->m_inc.i_hdr.h_len), RDS_FRAG_SIZE); + i = DIV_ROUND_UP(be32_to_cpu(rm->m_inc.i_hdr.h_len), RDS_FRAG_SIZE); work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, i, &pos); if (work_alloc == 0) { @@ -879,7 +879,7 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) * Instead of knowing how to return a partial rdma read/write we insist that there * be enough work requests to send the entire message. */ - i = ceil(op->op_count, max_sge); + i = DIV_ROUND_UP(op->op_count, max_sge); work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, i, &pos); if (work_alloc != i) { diff --git a/net/rds/message.c b/net/rds/message.c @@ -341,7 +341,7 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in { struct rds_message *rm; unsigned int i; - int num_sgs = ceil(total_len, PAGE_SIZE); + int num_sgs = DIV_ROUND_UP(total_len, PAGE_SIZE); int extra_bytes = num_sgs * sizeof(struct scatterlist); int ret; @@ -351,7 +351,7 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in set_bit(RDS_MSG_PAGEVEC, &rm->m_flags); rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len); - rm->data.op_nents = ceil(total_len, PAGE_SIZE); + rm->data.op_nents = DIV_ROUND_UP(total_len, PAGE_SIZE); rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs, &ret); if (!rm->data.op_sg) { rds_message_put(rm); diff --git a/net/rds/rds.h b/net/rds/rds.h @@ -48,10 +48,6 @@ void rdsdebug(char *fmt, ...) } #endif -/* XXX is there one of these somewhere? */ -#define ceil(x, y) \ - ({ unsigned long __x = (x), __y = (y); (__x + __y - 1) / __y; }) - #define RDS_FRAG_SHIFT 12 #define RDS_FRAG_SIZE ((unsigned int)(1 << RDS_FRAG_SHIFT)) diff --git a/net/rds/send.c b/net/rds/send.c @@ -1107,7 +1107,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) size_t total_payload_len = payload_len, rdma_payload_len = 0; bool zcopy = ((msg->msg_flags & MSG_ZEROCOPY) && sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY)); - int num_sgs = ceil(payload_len, PAGE_SIZE); + int num_sgs = DIV_ROUND_UP(payload_len, PAGE_SIZE); int namelen; struct rds_iov_vector_arr vct; int ind; diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c @@ -146,6 +146,9 @@ static int smc_release(struct socket *sock) sock_set_flag(sk, SOCK_DEAD); sk->sk_shutdown |= SHUTDOWN_MASK; } + + sk->sk_prot->unhash(sk); + if (smc->clcsock) { if (smc->use_fallback && sk->sk_state == SMC_LISTEN) { /* wake up clcsock accept */ @@ -170,7 +173,6 @@ static int smc_release(struct socket *sock) smc_conn_free(&smc->conn); release_sock(sk); - sk->sk_prot->unhash(sk); sock_put(sk); /* final sock_put */ out: return rc; diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c @@ -904,8 +904,10 @@ static int tipc_nl_compat_publ_dump(struct tipc_nl_compat_msg *msg, u32 sock) hdr = genlmsg_put(args, 0, 0, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_PUBL_GET); - if (!hdr) + if (!hdr) { + kfree_skb(args); return -EMSGSIZE; + } nest = nla_nest_start(args, TIPC_NLA_SOCK); if (!nest) { diff --git a/samples/bpf/test_cgrp2_attach2.c b/samples/bpf/test_cgrp2_attach2.c @@ -77,7 +77,7 @@ static int test_foo_bar(void) /* Create cgroup /foo, get fd, and join it */ foo = create_and_get_cgroup(FOO); - if (!foo) + if (foo < 0) goto err; if (join_cgroup(FOO)) @@ -94,7 +94,7 @@ static int test_foo_bar(void) /* Create cgroup /foo/bar, get fd, and join it */ bar = create_and_get_cgroup(BAR); - if (!bar) + if (bar < 0) goto err; if (join_cgroup(BAR)) @@ -298,19 +298,19 @@ static int test_multiprog(void) goto err; cg1 = create_and_get_cgroup("/cg1"); - if (!cg1) + if (cg1 < 0) goto err; cg2 = create_and_get_cgroup("/cg1/cg2"); - if (!cg2) + if (cg2 < 0) goto err; cg3 = create_and_get_cgroup("/cg1/cg2/cg3"); - if (!cg3) + if (cg3 < 0) goto err; cg4 = create_and_get_cgroup("/cg1/cg2/cg3/cg4"); - if (!cg4) + if (cg4 < 0) goto err; cg5 = create_and_get_cgroup("/cg1/cg2/cg3/cg4/cg5"); - if (!cg5) + if (cg5 < 0) goto err; if (join_cgroup("/cg1/cg2/cg3/cg4/cg5")) diff --git a/samples/bpf/test_current_task_under_cgroup_user.c b/samples/bpf/test_current_task_under_cgroup_user.c @@ -32,7 +32,7 @@ int main(int argc, char **argv) cg2 = create_and_get_cgroup(CGROUP_PATH); - if (!cg2) + if (cg2 < 0) goto err; if (bpf_map_update_elem(map_fd[0], &idx, &cg2, BPF_ANY)) { diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c @@ -103,7 +103,7 @@ int main(int argc, char **argv) return 1; } - ifindex = if_nametoindex(argv[1]); + ifindex = if_nametoindex(argv[optind]); if (!ifindex) { perror("if_nametoindex"); return 1; diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c @@ -82,8 +82,6 @@ static void btf_dumper_bitfield(__u32 nr_bits, __u8 bit_offset, int bits_to_copy; __u64 print_num; - data += BITS_ROUNDDOWN_BYTES(bit_offset); - bit_offset = BITS_PER_BYTE_MASKED(bit_offset); bits_to_copy = bit_offset + nr_bits; bytes_to_copy = BITS_ROUNDUP_BYTES(bits_to_copy); @@ -118,7 +116,9 @@ static void btf_dumper_int_bits(__u32 int_type, __u8 bit_offset, * BTF_INT_OFFSET() cannot exceed 64 bits. */ total_bits_offset = bit_offset + BTF_INT_OFFSET(int_type); - btf_dumper_bitfield(nr_bits, total_bits_offset, data, jw, + data += BITS_ROUNDDOWN_BYTES(total_bits_offset); + bit_offset = BITS_PER_BYTE_MASKED(total_bits_offset); + btf_dumper_bitfield(nr_bits, bit_offset, data, jw, is_plain_text); } @@ -216,11 +216,12 @@ static int btf_dumper_struct(const struct btf_dumper *d, __u32 type_id, } jsonw_name(d->jw, btf__name_by_offset(d->btf, m[i].name_off)); + data_off = data + BITS_ROUNDDOWN_BYTES(bit_offset); if (bitfield_size) { - btf_dumper_bitfield(bitfield_size, bit_offset, - data, d->jw, d->is_plain_text); + btf_dumper_bitfield(bitfield_size, + BITS_PER_BYTE_MASKED(bit_offset), + data_off, d->jw, d->is_plain_text); } else { - data_off = data + BITS_ROUNDDOWN_BYTES(bit_offset); ret = btf_dumper_do_type(d, m[i].type, BITS_PER_BYTE_MASKED(bit_offset), data_off); diff --git a/tools/lib/bpf/.gitignore b/tools/lib/bpf/.gitignore @@ -1,2 +1,3 @@ libbpf_version.h FEATURE-DUMP.libbpf +test_libbpf diff --git a/tools/lib/bpf/README.rst b/tools/lib/bpf/README.rst @@ -132,6 +132,20 @@ For example, if current state of ``libbpf.map`` is: Format of version script and ways to handle ABI changes, including incompatible ones, described in details in [1]. +Stand-alone build +================= + +Under https://github.com/libbpf/libbpf there is a (semi-)automated +mirror of the mainline's version of libbpf for a stand-alone build. + +However, all changes to libbpf's code base must be upstreamed through +the mainline kernel tree. + +License +======= + +libbpf is dual-licensed under LGPL 2.1 and BSD 2-Clause. + Links ===== diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore @@ -28,3 +28,4 @@ flow_dissector_load test_netcnt test_section_names test_tcpnotify_user +test_libbpf diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile @@ -55,7 +55,9 @@ TEST_PROGS := test_kmod.sh \ test_flow_dissector.sh \ test_xdp_vlan.sh -TEST_PROGS_EXTENDED := with_addr.sh +TEST_PROGS_EXTENDED := with_addr.sh \ + tcp_client.py \ + tcp_server.py # Compile but not part of 'make run_tests' TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_user \ diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -155,7 +155,7 @@ void cleanup_cgroup_environment(void) * This function creates a cgroup under the top level workdir and returns the * file descriptor. It is idempotent. * - * On success, it returns the file descriptor. On failure it returns 0. + * On success, it returns the file descriptor. On failure it returns -1. * If there is a failure, it prints the error to stderr. */ int create_and_get_cgroup(const char *path) @@ -166,13 +166,13 @@ int create_and_get_cgroup(const char *path) format_cgroup_path(cgroup_path, path); if (mkdir(cgroup_path, 0777) && errno != EEXIST) { log_err("mkdiring cgroup %s .. %s", path, cgroup_path); - return 0; + return -1; } fd = open(cgroup_path, O_RDONLY); if (fd < 0) { log_err("Opening Cgroup"); - return 0; + return -1; } return fd; diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c @@ -3526,6 +3526,8 @@ struct pprint_mapv { ENUM_TWO, ENUM_THREE, } aenum; + uint32_t ui32b; + uint32_t bits2c:2; }; static struct btf_raw_test pprint_test_template[] = { @@ -3568,7 +3570,7 @@ static struct btf_raw_test pprint_test_template[] = { BTF_ENUM_ENC(NAME_TBD, 2), BTF_ENUM_ENC(NAME_TBD, 3), /* struct pprint_mapv */ /* [16] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 8), 32), + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 10), 40), BTF_MEMBER_ENC(NAME_TBD, 11, 0), /* uint32_t ui32 */ BTF_MEMBER_ENC(NAME_TBD, 10, 32), /* uint16_t ui16 */ BTF_MEMBER_ENC(NAME_TBD, 12, 64), /* int32_t si32 */ @@ -3577,9 +3579,11 @@ static struct btf_raw_test pprint_test_template[] = { BTF_MEMBER_ENC(NAME_TBD, 6, 126), /* unused_bits2b */ BTF_MEMBER_ENC(0, 14, 128), /* union (anon) */ BTF_MEMBER_ENC(NAME_TBD, 15, 192), /* aenum */ + BTF_MEMBER_ENC(NAME_TBD, 11, 224), /* uint32_t ui32b */ + BTF_MEMBER_ENC(NAME_TBD, 6, 256), /* bits2c */ BTF_END_RAW, }, - BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum"), + BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum\0ui32b\0bits2c"), .key_size = sizeof(unsigned int), .value_size = sizeof(struct pprint_mapv), .key_type_id = 3, /* unsigned int */ @@ -3628,7 +3632,7 @@ static struct btf_raw_test pprint_test_template[] = { BTF_ENUM_ENC(NAME_TBD, 2), BTF_ENUM_ENC(NAME_TBD, 3), /* struct pprint_mapv */ /* [16] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 8), 32), + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 10), 40), BTF_MEMBER_ENC(NAME_TBD, 11, BTF_MEMBER_OFFSET(0, 0)), /* uint32_t ui32 */ BTF_MEMBER_ENC(NAME_TBD, 10, BTF_MEMBER_OFFSET(0, 32)), /* uint16_t ui16 */ BTF_MEMBER_ENC(NAME_TBD, 12, BTF_MEMBER_OFFSET(0, 64)), /* int32_t si32 */ @@ -3637,9 +3641,11 @@ static struct btf_raw_test pprint_test_template[] = { BTF_MEMBER_ENC(NAME_TBD, 6, BTF_MEMBER_OFFSET(2, 126)), /* unused_bits2b */ BTF_MEMBER_ENC(0, 14, BTF_MEMBER_OFFSET(0, 128)), /* union (anon) */ BTF_MEMBER_ENC(NAME_TBD, 15, BTF_MEMBER_OFFSET(0, 192)), /* aenum */ + BTF_MEMBER_ENC(NAME_TBD, 11, BTF_MEMBER_OFFSET(0, 224)), /* uint32_t ui32b */ + BTF_MEMBER_ENC(NAME_TBD, 6, BTF_MEMBER_OFFSET(2, 256)), /* bits2c */ BTF_END_RAW, }, - BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum"), + BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum\0ui32b\0bits2c"), .key_size = sizeof(unsigned int), .value_size = sizeof(struct pprint_mapv), .key_type_id = 3, /* unsigned int */ @@ -3690,7 +3696,7 @@ static struct btf_raw_test pprint_test_template[] = { BTF_ENUM_ENC(NAME_TBD, 2), BTF_ENUM_ENC(NAME_TBD, 3), /* struct pprint_mapv */ /* [16] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 8), 32), + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 10), 40), BTF_MEMBER_ENC(NAME_TBD, 11, BTF_MEMBER_OFFSET(0, 0)), /* uint32_t ui32 */ BTF_MEMBER_ENC(NAME_TBD, 10, BTF_MEMBER_OFFSET(0, 32)), /* uint16_t ui16 */ BTF_MEMBER_ENC(NAME_TBD, 12, BTF_MEMBER_OFFSET(0, 64)), /* int32_t si32 */ @@ -3699,13 +3705,15 @@ static struct btf_raw_test pprint_test_template[] = { BTF_MEMBER_ENC(NAME_TBD, 19, BTF_MEMBER_OFFSET(2, 126)),/* unused_bits2b */ BTF_MEMBER_ENC(0, 14, BTF_MEMBER_OFFSET(0, 128)), /* union (anon) */ BTF_MEMBER_ENC(NAME_TBD, 15, BTF_MEMBER_OFFSET(0, 192)), /* aenum */ + BTF_MEMBER_ENC(NAME_TBD, 11, BTF_MEMBER_OFFSET(0, 224)), /* uint32_t ui32b */ + BTF_MEMBER_ENC(NAME_TBD, 17, BTF_MEMBER_OFFSET(2, 256)), /* bits2c */ /* typedef unsigned int ___int */ /* [17] */ BTF_TYPEDEF_ENC(NAME_TBD, 18), BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_VOLATILE, 0, 0), 6), /* [18] */ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 15), /* [19] */ BTF_END_RAW, }, - BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum\0___int"), + BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum\0ui32b\0bits2c\0___int"), .key_size = sizeof(unsigned int), .value_size = sizeof(struct pprint_mapv), .key_type_id = 3, /* unsigned int */ @@ -3793,6 +3801,8 @@ static void set_pprint_mapv(struct pprint_mapv *v, uint32_t i, v->unused_bits2b = 3; v->ui64 = i; v->aenum = i & 0x03; + v->ui32b = 4; + v->bits2c = 1; v = (void *)v + rounded_value_size; } } @@ -3955,7 +3965,8 @@ static int do_test_pprint(int test_num) nexpected_line = snprintf(expected_line, sizeof(expected_line), "%s%u: {%u,0,%d,0x%x,0x%x,0x%x," - "{%lu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s}\n", + "{%lu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s," + "%u,0x%x}\n", percpu_map ? "\tcpu" : "", percpu_map ? cpu : next_key, cmapv->ui32, cmapv->si32, @@ -3967,7 +3978,9 @@ static int do_test_pprint(int test_num) cmapv->ui8a[2], cmapv->ui8a[3], cmapv->ui8a[4], cmapv->ui8a[5], cmapv->ui8a[6], cmapv->ui8a[7], - pprint_enum_str[cmapv->aenum]); + pprint_enum_str[cmapv->aenum], + cmapv->ui32b, + cmapv->bits2c); err = check_line(expected_line, nexpected_line, sizeof(expected_line), line); diff --git a/tools/testing/selftests/bpf/test_cgroup_storage.c b/tools/testing/selftests/bpf/test_cgroup_storage.c @@ -81,7 +81,7 @@ int main(int argc, char **argv) /* Create a cgroup, get fd, and join it */ cgroup_fd = create_and_get_cgroup(TEST_CGROUP); - if (!cgroup_fd) { + if (cgroup_fd < 0) { printf("Failed to create test cgroup\n"); goto err; } diff --git a/tools/testing/selftests/bpf/test_dev_cgroup.c b/tools/testing/selftests/bpf/test_dev_cgroup.c @@ -43,7 +43,7 @@ int main(int argc, char **argv) /* Create a cgroup, get fd, and join it */ cgroup_fd = create_and_get_cgroup(TEST_CGROUP); - if (!cgroup_fd) { + if (cgroup_fd < 0) { printf("Failed to create test cgroup\n"); goto err; } diff --git a/tools/testing/selftests/bpf/test_netcnt.c b/tools/testing/selftests/bpf/test_netcnt.c @@ -65,7 +65,7 @@ int main(int argc, char **argv) /* Create a cgroup, get fd, and join it */ cgroup_fd = create_and_get_cgroup(TEST_CGROUP); - if (!cgroup_fd) { + if (cgroup_fd < 0) { printf("Failed to create test cgroup\n"); goto err; } diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c @@ -164,7 +164,7 @@ int main(int argc, char **argv) goto err; cgfd = create_and_get_cgroup(CGROUP_PATH); - if (!cgfd) + if (cgfd < 0) goto err; if (join_cgroup(CGROUP_PATH)) diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c @@ -458,7 +458,7 @@ int main(int argc, char **argv) goto err; cgfd = create_and_get_cgroup(CG_PATH); - if (!cgfd) + if (cgfd < 0) goto err; if (join_cgroup(CG_PATH)) diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c @@ -44,6 +44,7 @@ #define SERV6_V4MAPPED_IP "::ffff:192.168.0.4" #define SRC6_IP "::1" #define SRC6_REWRITE_IP "::6" +#define WILDCARD6_IP "::" #define SERV6_PORT 6060 #define SERV6_REWRITE_PORT 6666 @@ -85,12 +86,14 @@ static int bind4_prog_load(const struct sock_addr_test *test); static int bind6_prog_load(const struct sock_addr_test *test); static int connect4_prog_load(const struct sock_addr_test *test); static int connect6_prog_load(const struct sock_addr_test *test); +static int sendmsg_allow_prog_load(const struct sock_addr_test *test); static int sendmsg_deny_prog_load(const struct sock_addr_test *test); static int sendmsg4_rw_asm_prog_load(const struct sock_addr_test *test); static int sendmsg4_rw_c_prog_load(const struct sock_addr_test *test); static int sendmsg6_rw_asm_prog_load(const struct sock_addr_test *test); static int sendmsg6_rw_c_prog_load(const struct sock_addr_test *test); static int sendmsg6_rw_v4mapped_prog_load(const struct sock_addr_test *test); +static int sendmsg6_rw_wildcard_prog_load(const struct sock_addr_test *test); static struct sock_addr_test tests[] = { /* bind */ @@ -463,6 +466,34 @@ static struct sock_addr_test tests[] = { SYSCALL_ENOTSUPP, }, { + "sendmsg6: set dst IP = [::] (BSD'ism)", + sendmsg6_rw_wildcard_prog_load, + BPF_CGROUP_UDP6_SENDMSG, + BPF_CGROUP_UDP6_SENDMSG, + AF_INET6, + SOCK_DGRAM, + SERV6_IP, + SERV6_PORT, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + SRC6_REWRITE_IP, + SUCCESS, + }, + { + "sendmsg6: preserve dst IP = [::] (BSD'ism)", + sendmsg_allow_prog_load, + BPF_CGROUP_UDP6_SENDMSG, + BPF_CGROUP_UDP6_SENDMSG, + AF_INET6, + SOCK_DGRAM, + WILDCARD6_IP, + SERV6_PORT, + SERV6_REWRITE_IP, + SERV6_PORT, + SRC6_IP, + SUCCESS, + }, + { "sendmsg6: deny call", sendmsg_deny_prog_load, BPF_CGROUP_UDP6_SENDMSG, @@ -734,16 +765,27 @@ static int connect6_prog_load(const struct sock_addr_test *test) return load_path(test, CONNECT6_PROG_PATH); } -static int sendmsg_deny_prog_load(const struct sock_addr_test *test) +static int sendmsg_ret_only_prog_load(const struct sock_addr_test *test, + int32_t rc) { struct bpf_insn insns[] = { - /* return 0 */ - BPF_MOV64_IMM(BPF_REG_0, 0), + /* return rc */ + BPF_MOV64_IMM(BPF_REG_0, rc), BPF_EXIT_INSN(), }; return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn)); } +static int sendmsg_allow_prog_load(const struct sock_addr_test *test) +{ + return sendmsg_ret_only_prog_load(test, /*rc*/ 1); +} + +static int sendmsg_deny_prog_load(const struct sock_addr_test *test) +{ + return sendmsg_ret_only_prog_load(test, /*rc*/ 0); +} + static int sendmsg4_rw_asm_prog_load(const struct sock_addr_test *test) { struct sockaddr_in dst4_rw_addr; @@ -864,6 +906,11 @@ static int sendmsg6_rw_v4mapped_prog_load(const struct sock_addr_test *test) return sendmsg6_rw_dst_asm_prog_load(test, SERV6_V4MAPPED_IP); } +static int sendmsg6_rw_wildcard_prog_load(const struct sock_addr_test *test) +{ + return sendmsg6_rw_dst_asm_prog_load(test, WILDCARD6_IP); +} + static int sendmsg6_rw_c_prog_load(const struct sock_addr_test *test) { return load_path(test, SENDMSG6_PROG_PATH); @@ -1395,7 +1442,7 @@ int main(int argc, char **argv) goto err; cgfd = create_and_get_cgroup(CG_PATH); - if (!cgfd) + if (cgfd < 0) goto err; if (join_cgroup(CG_PATH)) diff --git a/tools/testing/selftests/bpf/test_socket_cookie.c b/tools/testing/selftests/bpf/test_socket_cookie.c @@ -202,7 +202,7 @@ int main(int argc, char **argv) goto err; cgfd = create_and_get_cgroup(CG_PATH); - if (!cgfd) + if (cgfd < 0) goto err; if (join_cgroup(CG_PATH)) diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c @@ -103,7 +103,7 @@ int main(int argc, char **argv) goto err; cg_fd = create_and_get_cgroup(cg_path); - if (!cg_fd) + if (cg_fd < 0) goto err; if (join_cgroup(cg_path)) diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c @@ -115,7 +115,7 @@ int main(int argc, char **argv) goto err; cg_fd = create_and_get_cgroup(cg_path); - if (!cg_fd) + if (cg_fd < 0) goto err; if (join_cgroup(cg_path)) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c @@ -6934,6 +6934,126 @@ static struct bpf_test tests[] = { .retval = 1, }, { + "map access: mixing value pointer and scalar, 1", + .insns = { + // load map value pointer into r0 and r2 + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_MAP_FD(BPF_REG_ARG1, 0), + BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_FP), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG2, -16), + BPF_ST_MEM(BPF_DW, BPF_REG_FP, -16, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + // load some number from the map into r1 + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + // depending on r1, branch: + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 3), + // branch A + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_JMP_A(2), + // branch B + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 0x100000), + // common instruction + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), + // depending on r1, branch: + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + // branch A + BPF_JMP_A(4), + // branch B + BPF_MOV64_IMM(BPF_REG_0, 0x13371337), + // verifier follows fall-through + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0x100000, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + // fake-dead code; targeted from branch A to + // prevent dead code sanitization + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R2 tried to add from different pointers or scalars", + .retval = 0, + }, + { + "map access: mixing value pointer and scalar, 2", + .insns = { + // load map value pointer into r0 and r2 + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_MAP_FD(BPF_REG_ARG1, 0), + BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_FP), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG2, -16), + BPF_ST_MEM(BPF_DW, BPF_REG_FP, -16, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + // load some number from the map into r1 + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + // depending on r1, branch: + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3), + // branch A + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 0x100000), + BPF_JMP_A(2), + // branch B + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_3, 0), + // common instruction + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), + // depending on r1, branch: + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + // branch A + BPF_JMP_A(4), + // branch B + BPF_MOV64_IMM(BPF_REG_0, 0x13371337), + // verifier follows fall-through + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0x100000, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + // fake-dead code; targeted from branch A to + // prevent dead code sanitization + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R2 tried to add from different maps or paths", + .retval = 0, + }, + { + "sanitation: alu with different scalars", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_MAP_FD(BPF_REG_ARG1, 0), + BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_FP), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG2, -16), + BPF_ST_MEM(BPF_DW, BPF_REG_FP, -16, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 0x100000), + BPF_JMP_A(2), + BPF_MOV64_IMM(BPF_REG_2, 42), + BPF_MOV64_IMM(BPF_REG_3, 0x100001), + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result = ACCEPT, + .retval = 0x100000, + }, + { "map access: value_ptr += known scalar, upper oob arith, test 1", .insns = { BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh @@ -847,6 +847,24 @@ sanitization_vlan_aware_test() log_test "vlan-aware - failed enslavement to vlan-aware bridge" + bridge vlan del vid 10 dev vxlan20 + bridge vlan add vid 20 dev vxlan20 pvid untagged + + # Test that offloading of an unsupported tunnel fails when it is + # triggered by addition of VLAN to a local port + RET=0 + + # TOS must be set to inherit + ip link set dev vxlan10 type vxlan tos 42 + + ip link set dev $swp1 master br0 + bridge vlan add vid 10 dev $swp1 &> /dev/null + check_fail $? + + log_test "vlan-aware - failed vlan addition to a local port" + + ip link set dev vxlan10 type vxlan tos inherit + ip link del dev vxlan20 ip link del dev vxlan10 ip link del dev br0 diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh @@ -1,7 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding" +ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion" NUM_NETIFS=4 CHECK_TC="yes" source lib.sh @@ -96,6 +96,19 @@ flooding() flood_test $swp2 $h1 $h2 } +vlan_deletion() +{ + # Test that the deletion of a VLAN on a bridge port does not affect + # the PVID VLAN + log_info "Add and delete a VLAN on bridge port $swp1" + + bridge vlan add vid 10 dev $swp1 + bridge vlan del vid 10 dev $swp1 + + ping_ipv4 + ping_ipv6 +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh @@ -629,7 +629,7 @@ __test_ecn_decap() RET=0 tc filter add dev $h1 ingress pref 77 prot ip \ - flower ip_tos $decapped_tos action pass + flower ip_tos $decapped_tos action drop sleep 1 vxlan_encapped_ping_test v2 v1 192.0.2.17 \ $orig_inner_tos $orig_outer_tos \ diff --git a/tools/testing/selftests/net/ip_defrag.c b/tools/testing/selftests/net/ip_defrag.c @@ -203,6 +203,7 @@ static void send_udp_frags(int fd_raw, struct sockaddr *addr, { struct ip *iphdr = (struct ip *)ip_frame; struct ip6_hdr *ip6hdr = (struct ip6_hdr *)ip_frame; + const bool ipv4 = !ipv6; int res; int offset; int frag_len; @@ -239,19 +240,53 @@ static void send_udp_frags(int fd_raw, struct sockaddr *addr, iphdr->ip_sum = 0; } + /* Occasionally test in-order fragments. */ + if (!cfg_overlap && (rand() % 100 < 15)) { + offset = 0; + while (offset < (UDP_HLEN + payload_len)) { + send_fragment(fd_raw, addr, alen, offset, ipv6); + offset += max_frag_len; + } + return; + } + + /* Occasionally test IPv4 "runs" (see net/ipv4/ip_fragment.c) */ + if (ipv4 && !cfg_overlap && (rand() % 100 < 20) && + (payload_len > 9 * max_frag_len)) { + offset = 6 * max_frag_len; + while (offset < (UDP_HLEN + payload_len)) { + send_fragment(fd_raw, addr, alen, offset, ipv6); + offset += max_frag_len; + } + offset = 3 * max_frag_len; + while (offset < 6 * max_frag_len) { + send_fragment(fd_raw, addr, alen, offset, ipv6); + offset += max_frag_len; + } + offset = 0; + while (offset < 3 * max_frag_len) { + send_fragment(fd_raw, addr, alen, offset, ipv6); + offset += max_frag_len; + } + return; + } + /* Odd fragments. */ offset = max_frag_len; while (offset < (UDP_HLEN + payload_len)) { send_fragment(fd_raw, addr, alen, offset, ipv6); + /* IPv4 ignores duplicates, so randomly send a duplicate. */ + if (ipv4 && (1 == rand() % 100)) + send_fragment(fd_raw, addr, alen, offset, ipv6); offset += 2 * max_frag_len; } if (cfg_overlap) { /* Send an extra random fragment. */ - offset = rand() % (UDP_HLEN + payload_len - 1); - /* sendto() returns EINVAL if offset + frag_len is too small. */ if (ipv6) { struct ip6_frag *fraghdr = (struct ip6_frag *)(ip_frame + IP6_HLEN); + /* sendto() returns EINVAL if offset + frag_len is too small. */ + offset = rand() % (UDP_HLEN + payload_len - 1); frag_len = max_frag_len + rand() % 256; /* In IPv6 if !!(frag_len % 8), the fragment is dropped. */ frag_len &= ~0x7; @@ -259,13 +294,29 @@ static void send_udp_frags(int fd_raw, struct sockaddr *addr, ip6hdr->ip6_plen = htons(frag_len); frag_len += IP6_HLEN; } else { - frag_len = IP4_HLEN + UDP_HLEN + rand() % 256; + /* In IPv4, duplicates and some fragments completely inside + * previously sent fragments are dropped/ignored. So + * random offset and frag_len can result in a dropped + * fragment instead of a dropped queue/packet. So we + * hard-code offset and frag_len. + * + * See ade446403bfb ("net: ipv4: do not handle duplicate + * fragments as overlapping"). + */ + if (max_frag_len * 4 < payload_len || max_frag_len < 16) { + /* not enough payload to play with random offset and frag_len. */ + offset = 8; + frag_len = IP4_HLEN + UDP_HLEN + max_frag_len; + } else { + offset = rand() % (payload_len / 2); + frag_len = 2 * max_frag_len + 1 + rand() % 256; + } iphdr->ip_off = htons(offset / 8 | IP4_MF); iphdr->ip_len = htons(frag_len); } res = sendto(fd_raw, ip_frame, frag_len, 0, addr, alen); if (res < 0) - error(1, errno, "sendto overlap"); + error(1, errno, "sendto overlap: %d", frag_len); if (res != frag_len) error(1, 0, "sendto overlap: %d vs %d", (int)res, frag_len); frag_counter++; @@ -275,6 +326,9 @@ static void send_udp_frags(int fd_raw, struct sockaddr *addr, offset = 0; while (offset < (UDP_HLEN + payload_len)) { send_fragment(fd_raw, addr, alen, offset, ipv6); + /* IPv4 ignores duplicates, so randomly send a duplicate. */ + if (ipv4 && (1 == rand() % 100)) + send_fragment(fd_raw, addr, alen, offset, ipv6); offset += 2 * max_frag_len; } } @@ -282,7 +336,11 @@ static void send_udp_frags(int fd_raw, struct sockaddr *addr, static void run_test(struct sockaddr *addr, socklen_t alen, bool ipv6) { int fd_tx_raw, fd_rx_udp; - struct timeval tv = { .tv_sec = 0, .tv_usec = 10 * 1000 }; + /* Frag queue timeout is set to one second in the calling script; + * socket timeout should be just a bit longer to avoid tests interfering + * with each other. + */ + struct timeval tv = { .tv_sec = 1, .tv_usec = 10 }; int idx; int min_frag_len = ipv6 ? 1280 : 8; @@ -308,12 +366,32 @@ static void run_test(struct sockaddr *addr, socklen_t alen, bool ipv6) payload_len += (rand() % 4096)) { if (cfg_verbose) printf("payload_len: %d\n", payload_len); - max_frag_len = min_frag_len; - do { + + if (cfg_overlap) { + /* With overlaps, one send/receive pair below takes + * at least one second (== timeout) to run, so there + * is not enough test time to run a nested loop: + * the full overlap test takes 20-30 seconds. + */ + max_frag_len = min_frag_len + + rand() % (1500 - FRAG_HLEN - min_frag_len); send_udp_frags(fd_tx_raw, addr, alen, ipv6); recv_validate_udp(fd_rx_udp); - max_frag_len += 8 * (rand() % 8); - } while (max_frag_len < (1500 - FRAG_HLEN) && max_frag_len <= payload_len); + } else { + /* Without overlaps, each packet reassembly (== one + * send/receive pair below) takes very little time to + * run, so we can easily afford more thourough testing + * with a nested loop: the full non-overlap test takes + * less than one second). + */ + max_frag_len = min_frag_len; + do { + send_udp_frags(fd_tx_raw, addr, alen, ipv6); + recv_validate_udp(fd_rx_udp); + max_frag_len += 8 * (rand() % 8); + } while (max_frag_len < (1500 - FRAG_HLEN) && + max_frag_len <= payload_len); + } } /* Cleanup. */ diff --git a/tools/testing/selftests/net/ip_defrag.sh b/tools/testing/selftests/net/ip_defrag.sh @@ -11,10 +11,17 @@ readonly NETNS="ns-$(mktemp -u XXXXXX)" setup() { ip netns add "${NETNS}" ip -netns "${NETNS}" link set lo up + ip netns exec "${NETNS}" sysctl -w net.ipv4.ipfrag_high_thresh=9000000 >/dev/null 2>&1 ip netns exec "${NETNS}" sysctl -w net.ipv4.ipfrag_low_thresh=7000000 >/dev/null 2>&1 + ip netns exec "${NETNS}" sysctl -w net.ipv4.ipfrag_time=1 >/dev/null 2>&1 + ip netns exec "${NETNS}" sysctl -w net.ipv6.ip6frag_high_thresh=9000000 >/dev/null 2>&1 ip netns exec "${NETNS}" sysctl -w net.ipv6.ip6frag_low_thresh=7000000 >/dev/null 2>&1 + ip netns exec "${NETNS}" sysctl -w net.ipv6.ip6frag_time=1 >/dev/null 2>&1 + + # DST cache can get full with a lot of frags, with GC not keeping up with the test. + ip netns exec "${NETNS}" sysctl -w net.ipv6.route.max_size=65536 >/dev/null 2>&1 } cleanup() { @@ -27,7 +34,6 @@ setup echo "ipv4 defrag" ip netns exec "${NETNS}" ./ip_defrag -4 - echo "ipv4 defrag with overlaps" ip netns exec "${NETNS}" ./ip_defrag -4o @@ -37,3 +43,4 @@ ip netns exec "${NETNS}" ./ip_defrag -6 echo "ipv6 defrag with overlaps" ip netns exec "${NETNS}" ./ip_defrag -6o +echo "all tests done"