whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit d48f782e4fb20dc7ec935ca0ca41ae31e4a69362
parent 8586ca8a214471e4573d76356aabe890bfecdc8a
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Sun,  9 Dec 2018 15:12:33 -0800

Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

Pull networking fixes from David Miller:
 "A decent batch of fixes here. I'd say about half are for problems that
  have existed for a while, and half are for new regressions added in
  the 4.20 merge window.

   1) Fix 10G SFP phy module detection in mvpp2, from Baruch Siach.

   2) Revert bogus emac driver change, from Benjamin Herrenschmidt.

   3) Handle BPF exported data structure with pointers when building
      32-bit userland, from Daniel Borkmann.

   4) Memory leak fix in act_police, from Davide Caratti.

   5) Check RX checksum offload in RX descriptors properly in aquantia
      driver, from Dmitry Bogdanov.

   6) SKB unlink fix in various spots, from Edward Cree.

   7) ndo_dflt_fdb_dump() only works with ethernet, enforce this, from
      Eric Dumazet.

   8) Fix FID leak in mlxsw driver, from Ido Schimmel.

   9) IOTLB locking fix in vhost, from Jean-Philippe Brucker.

  10) Fix SKB truesize accounting in ipv4/ipv6/netfilter frag memory
      limits otherwise namespace exit can hang. From Jiri Wiesner.

  11) Address block parsing length fixes in x25 from Martin Schiller.

  12) IRQ and ring accounting fixes in bnxt_en, from Michael Chan.

  13) For tun interfaces, only iface delete works with rtnl ops, enforce
      this by disallowing add. From Nicolas Dichtel.

  14) Use after free in liquidio, from Pan Bian.

  15) Fix SKB use after passing to netif_receive_skb(), from Prashant
      Bhole.

  16) Static key accounting and other fixes in XPS from Sabrina Dubroca.

  17) Partially initialized flow key passed to ip6_route_output(), from
      Shmulik Ladkani.

  18) Fix RTNL deadlock during reset in ibmvnic driver, from Thomas
      Falcon.

  19) Several small TCP fixes (off-by-one on window probe abort, NULL
      deref in tail loss probe, SNMP mis-estimations) from Yuchung
      Cheng"

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (93 commits)
  net/sched: cls_flower: Reject duplicated rules also under skip_sw
  bnxt_en: Fix _bnxt_get_max_rings() for 57500 chips.
  bnxt_en: Fix NQ/CP rings accounting on the new 57500 chips.
  bnxt_en: Keep track of reserved IRQs.
  bnxt_en: Fix CNP CoS queue regression.
  net/mlx4_core: Correctly set PFC param if global pause is turned off.
  Revert "net/ibm/emac: wrong bit is used for STA control"
  neighbour: Avoid writing before skb->head in neigh_hh_output()
  ipv6: Check available headroom in ip6_xmit() even without options
  tcp: lack of available data can also cause TSO defer
  ipv6: sr: properly initialize flowi6 prior passing to ip6_route_output
  mlxsw: spectrum_switchdev: Fix VLAN device deletion via ioctl
  mlxsw: spectrum_router: Relax GRE decap matching check
  mlxsw: spectrum_switchdev: Avoid leaking FID's reference count
  mlxsw: spectrum_nve: Remove easily triggerable warnings
  ipv4: ipv6: netfilter: Adjust the frag mem limit when truesize changes
  sctp: frag_point sanity check
  tcp: fix NULL ref in tail loss probe
  tcp: Do not underestimate rwnd_limited
  net: use skb_list_del_init() to remove from RX sublists
  ...

Diffstat:
MDocumentation/ABI/testing/sysfs-class-net-dsa | 2+-
MMAINTAINERS | 7+++++++
March/powerpc/net/bpf_jit_comp64.c | 66++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mdrivers/net/bonding/bond_3ad.c | 3+++
Mdrivers/net/dsa/mv88e6060.c | 10+++-------
Mdrivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 2+-
Mdrivers/net/ethernet/broadcom/bnxt/bnxt.c | 60++++++++++++++++++++++++++++++++++++++++++++++++------------
Mdrivers/net/ethernet/broadcom/bnxt/bnxt.h | 1+
Mdrivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 2+-
Mdrivers/net/ethernet/cavium/liquidio/lio_ethtool.c | 2+-
Mdrivers/net/ethernet/cavium/liquidio/lio_vf_rep.c | 4+++-
Mdrivers/net/ethernet/freescale/fman/fman.c | 5++---
Mdrivers/net/ethernet/ibm/emac/emac.h | 2+-
Mdrivers/net/ethernet/ibm/ibmvnic.c | 2+-
Mdrivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 34+++++++++++++++++++++++++++++++++-
Mdrivers/net/ethernet/mellanox/mlx4/Kconfig | 2+-
Mdrivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 4++--
Mdrivers/net/ethernet/mellanox/mlx4/en_netdev.c | 4++--
Mdrivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 1-
Mdrivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 6+++---
Mdrivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c | 4++--
Mdrivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 5+----
Mdrivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 17+++++++++++++----
Mdrivers/net/ethernet/netronome/nfp/flower/offload.c | 18++++++++++++------
Mdrivers/net/ethernet/realtek/8139cp.c | 5+++++
Mdrivers/net/ethernet/socionext/sni_ave.c | 24++++++++++++++----------
Mdrivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 23+++++++++++++----------
Mdrivers/net/macvlan.c | 7+++++--
Mdrivers/net/phy/phy_device.c | 19++++++++-----------
Mdrivers/net/phy/sfp-bus.c | 2+-
Mdrivers/net/tun.c | 9+++++----
Mdrivers/net/virtio_net.c | 14+++++++++-----
Mdrivers/net/wireless/mac80211_hwsim.c | 20+++++++++++---------
Mdrivers/vhost/vhost.c | 3---
Minclude/linux/filter.h | 7+++++++
Minclude/linux/sfp.h | 2+-
Minclude/net/neighbour.h | 28+++++++++++++++++++++++-----
Minclude/net/sctp/sctp.h | 5+++++
Minclude/net/sctp/structs.h | 2++
Minclude/uapi/linux/bpf.h | 56+++++++++++++++++++++++++++++++++++++-------------------
Mkernel/bpf/btf.c | 82+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mkernel/bpf/verifier.c | 103++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------
Mnet/bpf/test_run.c | 21+++++++++++++++------
Mnet/core/dev.c | 65+++++++++++++++++++++++++++++++++++------------------------------
Mnet/core/filter.c | 27++++++++++++++-------------
Mnet/core/rtnetlink.c | 3+++
Mnet/dsa/master.c | 34+++++++++++++++++++++++++++++++++-
Mnet/dsa/slave.c | 28----------------------------
Mnet/ipv4/ip_fragment.c | 7+++++++
Mnet/ipv4/ip_input.c | 4++--
Mnet/ipv4/tcp_output.c | 45++++++++++++++++++++++++++++++++-------------
Mnet/ipv4/tcp_timer.c | 10+++++-----
Mnet/ipv6/ip6_input.c | 4++--
Mnet/ipv6/ip6_output.c | 42+++++++++++++++++++++---------------------
Mnet/ipv6/netfilter/nf_conntrack_reasm.c | 8+++++++-
Mnet/ipv6/reassembly.c | 8+++++++-
Mnet/ipv6/seg6_iptunnel.c | 1+
Mnet/mac80211/cfg.c | 7++++---
Mnet/mac80211/iface.c | 2++
Mnet/mac80211/mlme.c | 12++++++++----
Mnet/mac80211/rx.c | 5+++--
Mnet/mac80211/status.c | 2++
Mnet/mac80211/tx.c | 4++--
Mnet/openvswitch/conntrack.c | 2+-
Mnet/sched/act_police.c | 24++++++++++++------------
Mnet/sched/cls_flower.c | 23++++++++++-------------
Mnet/sched/sch_netem.c | 3+++
Mnet/sctp/associola.c | 9+++++----
Mnet/sctp/chunk.c | 6++++++
Mnet/sctp/sm_make_chunk.c | 3+++
Mnet/sctp/socket.c | 3+--
Mnet/wireless/mlme.c | 4++--
Mnet/wireless/nl80211.c | 1+
Mnet/wireless/sme.c | 8+++++++-
Mnet/wireless/util.c | 2++
Mnet/x25/af_x25.c | 18+++++++++++-------
Mnet/x25/x25_in.c | 9+++++++++
Mtools/bpf/bpftool/btf_dumper.c | 6+++---
Mtools/include/uapi/linux/bpf.h | 56+++++++++++++++++++++++++++++++++++++-------------------
Mtools/testing/selftests/bpf/bpf_helpers.h | 4++--
Mtools/testing/selftests/bpf/test_btf.c | 375+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Mtools/testing/selftests/bpf/test_sk_lookup_kern.c | 18+++++++++---------
Mtools/testing/selftests/bpf/test_verifier.c | 6+++---
83 files changed, 1225 insertions(+), 368 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-class-net-dsa b/Documentation/ABI/testing/sysfs-class-net-dsa @@ -1,4 +1,4 @@ -What: /sys/class/net/<iface>/tagging +What: /sys/class/net/<iface>/dsa/tagging Date: August 2018 KernelVersion: 4.20 Contact: netdev@vger.kernel.org diff --git a/MAINTAINERS b/MAINTAINERS @@ -13890,6 +13890,13 @@ F: drivers/md/raid* F: include/linux/raid/ F: include/uapi/linux/raid/ +SOCIONEXT (SNI) AVE NETWORK DRIVER +M: Kunihiko Hayashi <hayashi.kunihiko@socionext.com> +L: netdev@vger.kernel.org +S: Maintained +F: drivers/net/ethernet/socionext/sni_ave.c +F: Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt + SOCIONEXT (SNI) NETSEC NETWORK DRIVER M: Jassi Brar <jaswinder.singh@linaro.org> L: netdev@vger.kernel.org diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c @@ -891,6 +891,55 @@ cond_branch: return 0; } +/* Fix the branch target addresses for subprog calls */ +static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image, + struct codegen_context *ctx, u32 *addrs) +{ + const struct bpf_insn *insn = fp->insnsi; + bool func_addr_fixed; + u64 func_addr; + u32 tmp_idx; + int i, ret; + + for (i = 0; i < fp->len; i++) { + /* + * During the extra pass, only the branch target addresses for + * the subprog calls need to be fixed. All other instructions + * can left untouched. + * + * The JITed image length does not change because we already + * ensure that the JITed instruction sequence for these calls + * are of fixed length by padding them with NOPs. + */ + if (insn[i].code == (BPF_JMP | BPF_CALL) && + insn[i].src_reg == BPF_PSEUDO_CALL) { + ret = bpf_jit_get_func_addr(fp, &insn[i], true, + &func_addr, + &func_addr_fixed); + if (ret < 0) + return ret; + + /* + * Save ctx->idx as this would currently point to the + * end of the JITed image and set it to the offset of + * the instruction sequence corresponding to the + * subprog call temporarily. + */ + tmp_idx = ctx->idx; + ctx->idx = addrs[i] / 4; + bpf_jit_emit_func_call_rel(image, ctx, func_addr); + + /* + * Restore ctx->idx here. This is safe as the length + * of the JITed sequence remains unchanged. + */ + ctx->idx = tmp_idx; + } + } + + return 0; +} + struct powerpc64_jit_data { struct bpf_binary_header *header; u32 *addrs; @@ -989,6 +1038,22 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) skip_init_ctx: code_base = (u32 *)(image + FUNCTION_DESCR_SIZE); + if (extra_pass) { + /* + * Do not touch the prologue and epilogue as they will remain + * unchanged. Only fix the branch target address for subprog + * calls in the body. + * + * This does not change the offsets and lengths of the subprog + * call instruction sequences and hence, the size of the JITed + * image as well. + */ + bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs); + + /* There is no need to perform the usual passes. */ + goto skip_codegen_passes; + } + /* Code generation passes 1-2 */ for (pass = 1; pass < 3; pass++) { /* Now build the prologue, body code & epilogue for real. */ @@ -1002,6 +1067,7 @@ skip_init_ctx: proglen - (cgctx.idx * 4), cgctx.seen); } +skip_codegen_passes: if (bpf_jit_enable > 1) /* * Note that we output the base address of the code_base diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c @@ -2086,6 +2086,9 @@ void bond_3ad_unbind_slave(struct slave *slave) aggregator->aggregator_identifier); /* Tell the partner that this port is not suitable for aggregation */ + port->actor_oper_port_state &= ~AD_STATE_SYNCHRONIZATION; + port->actor_oper_port_state &= ~AD_STATE_COLLECTING; + port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING; port->actor_oper_port_state &= ~AD_STATE_AGGREGATION; __update_lacpdu_from_port(port); ad_lacpdu_send(port); diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c @@ -116,8 +116,7 @@ static int mv88e6060_switch_reset(struct dsa_switch *ds) /* Reset the switch. */ REG_WRITE(REG_GLOBAL, GLOBAL_ATU_CONTROL, GLOBAL_ATU_CONTROL_SWRESET | - GLOBAL_ATU_CONTROL_ATUSIZE_1024 | - GLOBAL_ATU_CONTROL_ATE_AGE_5MIN); + GLOBAL_ATU_CONTROL_LEARNDIS); /* Wait up to one second for reset to complete. */ timeout = jiffies + 1 * HZ; @@ -142,13 +141,10 @@ static int mv88e6060_setup_global(struct dsa_switch *ds) */ REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL, GLOBAL_CONTROL_MAX_FRAME_1536); - /* Enable automatic address learning, set the address - * database size to 1024 entries, and set the default aging - * time to 5 minutes. + /* Disable automatic address learning. */ REG_WRITE(REG_GLOBAL, GLOBAL_ATU_CONTROL, - GLOBAL_ATU_CONTROL_ATUSIZE_1024 | - GLOBAL_ATU_CONTROL_ATE_AGE_5MIN); + GLOBAL_ATU_CONTROL_LEARNDIS); return 0; } diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -674,7 +674,7 @@ static int hw_atl_b0_hw_ring_rx_receive(struct aq_hw_s *self, rx_stat = (0x0000003CU & rxd_wb->status) >> 2; - is_rx_check_sum_enabled = (rxd_wb->type) & (0x3U << 19); + is_rx_check_sum_enabled = (rxd_wb->type >> 19) & 0x3U; pkt_type = 0xFFU & (rxd_wb->type >> 4); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5162,6 +5162,7 @@ static int bnxt_hwrm_get_rings(struct bnxt *bp) cp = le16_to_cpu(resp->alloc_cmpl_rings); stats = le16_to_cpu(resp->alloc_stat_ctx); cp = min_t(u16, cp, stats); + hw_resc->resv_irqs = cp; if (bp->flags & BNXT_FLAG_CHIP_P5) { int rx = hw_resc->resv_rx_rings; int tx = hw_resc->resv_tx_rings; @@ -5175,7 +5176,7 @@ static int bnxt_hwrm_get_rings(struct bnxt *bp) hw_resc->resv_rx_rings = rx; hw_resc->resv_tx_rings = tx; } - cp = le16_to_cpu(resp->alloc_msix); + hw_resc->resv_irqs = le16_to_cpu(resp->alloc_msix); hw_resc->resv_hw_ring_grps = rx; } hw_resc->resv_cp_rings = cp; @@ -5353,7 +5354,7 @@ static int bnxt_hwrm_reserve_rings(struct bnxt *bp, int tx, int rx, int grp, return bnxt_hwrm_reserve_vf_rings(bp, tx, rx, grp, cp, vnic); } -static int bnxt_cp_rings_in_use(struct bnxt *bp) +static int bnxt_nq_rings_in_use(struct bnxt *bp) { int cp = bp->cp_nr_rings; int ulp_msix, ulp_base; @@ -5368,10 +5369,22 @@ static int bnxt_cp_rings_in_use(struct bnxt *bp) return cp; } +static int bnxt_cp_rings_in_use(struct bnxt *bp) +{ + int cp; + + if (!(bp->flags & BNXT_FLAG_CHIP_P5)) + return bnxt_nq_rings_in_use(bp); + + cp = bp->tx_nr_rings + bp->rx_nr_rings; + return cp; +} + static bool bnxt_need_reserve_rings(struct bnxt *bp) { struct bnxt_hw_resc *hw_resc = &bp->hw_resc; int cp = bnxt_cp_rings_in_use(bp); + int nq = bnxt_nq_rings_in_use(bp); int rx = bp->rx_nr_rings; int vnic = 1, grp = rx; @@ -5387,7 +5400,7 @@ static bool bnxt_need_reserve_rings(struct bnxt *bp) rx <<= 1; if (BNXT_NEW_RM(bp) && (hw_resc->resv_rx_rings != rx || hw_resc->resv_cp_rings != cp || - hw_resc->resv_vnics != vnic || + hw_resc->resv_irqs < nq || hw_resc->resv_vnics != vnic || (hw_resc->resv_hw_ring_grps != grp && !(bp->flags & BNXT_FLAG_CHIP_P5)))) return true; @@ -5397,7 +5410,7 @@ static bool bnxt_need_reserve_rings(struct bnxt *bp) static int __bnxt_reserve_rings(struct bnxt *bp) { struct bnxt_hw_resc *hw_resc = &bp->hw_resc; - int cp = bnxt_cp_rings_in_use(bp); + int cp = bnxt_nq_rings_in_use(bp); int tx = bp->tx_nr_rings; int rx = bp->rx_nr_rings; int grp, rx_rings, rc; @@ -5422,7 +5435,7 @@ static int __bnxt_reserve_rings(struct bnxt *bp) tx = hw_resc->resv_tx_rings; if (BNXT_NEW_RM(bp)) { rx = hw_resc->resv_rx_rings; - cp = hw_resc->resv_cp_rings; + cp = hw_resc->resv_irqs; grp = hw_resc->resv_hw_ring_grps; vnic = hw_resc->resv_vnics; } @@ -6292,6 +6305,8 @@ hwrm_func_qcaps_exit: return rc; } +static int bnxt_hwrm_queue_qportcfg(struct bnxt *bp); + static int bnxt_hwrm_func_qcaps(struct bnxt *bp) { int rc; @@ -6299,6 +6314,11 @@ static int bnxt_hwrm_func_qcaps(struct bnxt *bp) rc = __bnxt_hwrm_func_qcaps(bp); if (rc) return rc; + rc = bnxt_hwrm_queue_qportcfg(bp); + if (rc) { + netdev_err(bp->dev, "hwrm query qportcfg failure rc: %d\n", rc); + return rc; + } if (bp->hwrm_spec_code >= 0x10803) { rc = bnxt_alloc_ctx_mem(bp); if (rc) @@ -7026,7 +7046,12 @@ unsigned int bnxt_get_max_func_cp_rings(struct bnxt *bp) unsigned int bnxt_get_max_func_cp_rings_for_en(struct bnxt *bp) { - return bp->hw_resc.max_cp_rings - bnxt_get_ulp_msix_num(bp); + unsigned int cp = bp->hw_resc.max_cp_rings; + + if (!(bp->flags & BNXT_FLAG_CHIP_P5)) + cp -= bnxt_get_ulp_msix_num(bp); + + return cp; } static unsigned int bnxt_get_max_func_irqs(struct bnxt *bp) @@ -7048,7 +7073,9 @@ int bnxt_get_avail_msix(struct bnxt *bp, int num) int total_req = bp->cp_nr_rings + num; int max_idx, avail_msix; - max_idx = min_t(int, bp->total_irqs, max_cp); + max_idx = bp->total_irqs; + if (!(bp->flags & BNXT_FLAG_CHIP_P5)) + max_idx = min_t(int, bp->total_irqs, max_cp); avail_msix = max_idx - bp->cp_nr_rings; if (!BNXT_NEW_RM(bp) || avail_msix >= num) return avail_msix; @@ -7066,7 +7093,7 @@ static int bnxt_get_num_msix(struct bnxt *bp) if (!BNXT_NEW_RM(bp)) return bnxt_get_max_func_irqs(bp); - return bnxt_cp_rings_in_use(bp); + return bnxt_nq_rings_in_use(bp); } static int bnxt_init_msix(struct bnxt *bp) @@ -7794,6 +7821,7 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up) rc = bnxt_hwrm_func_resc_qcaps(bp, true); hw_resc->resv_cp_rings = 0; + hw_resc->resv_irqs = 0; hw_resc->resv_tx_rings = 0; hw_resc->resv_rx_rings = 0; hw_resc->resv_hw_ring_grps = 0; @@ -9799,13 +9827,16 @@ static void _bnxt_get_max_rings(struct bnxt *bp, int *max_rx, int *max_tx, int *max_cp) { struct bnxt_hw_resc *hw_resc = &bp->hw_resc; - int max_ring_grps = 0; + int max_ring_grps = 0, max_irq; *max_tx = hw_resc->max_tx_rings; *max_rx = hw_resc->max_rx_rings; - *max_cp = min_t(int, bnxt_get_max_func_cp_rings_for_en(bp), - hw_resc->max_irqs - bnxt_get_ulp_msix_num(bp)); - *max_cp = min_t(int, *max_cp, hw_resc->max_stat_ctxs); + *max_cp = bnxt_get_max_func_cp_rings_for_en(bp); + max_irq = min_t(int, bnxt_get_max_func_irqs(bp) - + bnxt_get_ulp_msix_num(bp), + bnxt_get_max_func_stat_ctxs(bp)); + if (!(bp->flags & BNXT_FLAG_CHIP_P5)) + *max_cp = min_t(int, *max_cp, max_irq); max_ring_grps = hw_resc->max_hw_ring_grps; if (BNXT_CHIP_TYPE_NITRO_A0(bp) && BNXT_PF(bp)) { *max_cp -= 1; @@ -9813,6 +9844,11 @@ static void _bnxt_get_max_rings(struct bnxt *bp, int *max_rx, int *max_tx, } if (bp->flags & BNXT_FLAG_AGG_RINGS) *max_rx >>= 1; + if (bp->flags & BNXT_FLAG_CHIP_P5) { + bnxt_trim_rings(bp, max_rx, max_tx, *max_cp, false); + /* On P5 chips, max_cp output param should be available NQs */ + *max_cp = max_irq; + } *max_rx = min_t(int, *max_rx, max_ring_grps); } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -928,6 +928,7 @@ struct bnxt_hw_resc { u16 min_stat_ctxs; u16 max_stat_ctxs; u16 max_irqs; + u16 resv_irqs; }; #if defined(CONFIG_BNXT_SRIOV) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -168,7 +168,7 @@ static int bnxt_req_msix_vecs(struct bnxt_en_dev *edev, int ulp_id, if (BNXT_NEW_RM(bp)) { struct bnxt_hw_resc *hw_resc = &bp->hw_resc; - avail_msix = hw_resc->resv_cp_rings - bp->cp_nr_rings; + avail_msix = hw_resc->resv_irqs - bp->cp_nr_rings; edev->ulp_tbl[ulp_id].msix_requested = avail_msix; } bnxt_fill_msix_vecs(bp, ent); diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c @@ -111,7 +111,7 @@ static const char oct_stats_strings[][ETH_GSTRING_LEN] = { "mac_tx_one_collision", "mac_tx_multi_collision", "mac_tx_max_collision_fail", - "mac_tx_max_deferal_fail", + "mac_tx_max_deferral_fail", "mac_tx_fifo_err", "mac_tx_runts", diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c @@ -349,13 +349,15 @@ lio_vf_rep_packet_sent_callback(struct octeon_device *oct, struct octeon_soft_command *sc = (struct octeon_soft_command *)buf; struct sk_buff *skb = sc->ctxptr; struct net_device *ndev = skb->dev; + u32 iq_no; dma_unmap_single(&oct->pci_dev->dev, sc->dmadptr, sc->datasize, DMA_TO_DEVICE); dev_kfree_skb_any(skb); + iq_no = sc->iq_no; octeon_free_soft_command(oct, sc); - if (octnet_iq_is_full(oct, sc->iq_no)) + if (octnet_iq_is_full(oct, iq_no)) return; if (netif_queue_stopped(ndev)) diff --git a/drivers/net/ethernet/freescale/fman/fman.c b/drivers/net/ethernet/freescale/fman/fman.c @@ -2786,7 +2786,7 @@ static struct fman *read_dts_node(struct platform_device *of_dev) if (!muram_node) { dev_err(&of_dev->dev, "%s: could not find MURAM node\n", __func__); - goto fman_node_put; + goto fman_free; } err = of_address_to_resource(muram_node, 0, @@ -2795,11 +2795,10 @@ static struct fman *read_dts_node(struct platform_device *of_dev) of_node_put(muram_node); dev_err(&of_dev->dev, "%s: of_address_to_resource() = %d\n", __func__, err); - goto fman_node_put; + goto fman_free; } of_node_put(muram_node); - of_node_put(fm_node); err = devm_request_irq(&of_dev->dev, irq, fman_irq, IRQF_SHARED, "fman", fman); diff --git a/drivers/net/ethernet/ibm/emac/emac.h b/drivers/net/ethernet/ibm/emac/emac.h @@ -231,7 +231,7 @@ struct emac_regs { #define EMAC_STACR_PHYE 0x00004000 #define EMAC_STACR_STAC_MASK 0x00003000 #define EMAC_STACR_STAC_READ 0x00001000 -#define EMAC_STACR_STAC_WRITE 0x00000800 +#define EMAC_STACR_STAC_WRITE 0x00002000 #define EMAC_STACR_OPBC_MASK 0x00000C00 #define EMAC_STACR_OPBC_50 0x00000000 #define EMAC_STACR_OPBC_66 0x00000400 diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1859,7 +1859,7 @@ static int do_reset(struct ibmvnic_adapter *adapter, if (adapter->reset_reason != VNIC_RESET_FAILOVER && adapter->reset_reason != VNIC_RESET_CHANGE_PARAM) - netdev_notify_peers(netdev); + call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, netdev); netif_carrier_on(netdev); diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -4375,8 +4375,27 @@ static void mvpp2_phylink_validate(struct net_device *dev, unsigned long *supported, struct phylink_link_state *state) { + struct mvpp2_port *port = netdev_priv(dev); __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; + /* Invalid combinations */ + switch (state->interface) { + case PHY_INTERFACE_MODE_10GKR: + case PHY_INTERFACE_MODE_XAUI: + if (port->gop_id != 0) + goto empty_set; + break; + case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII_ID: + case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_RGMII_TXID: + if (port->gop_id == 0) + goto empty_set; + break; + default: + break; + } + phylink_set(mask, Autoneg); phylink_set_port_modes(mask); phylink_set(mask, Pause); @@ -4384,6 +4403,8 @@ static void mvpp2_phylink_validate(struct net_device *dev, switch (state->interface) { case PHY_INTERFACE_MODE_10GKR: + case PHY_INTERFACE_MODE_XAUI: + case PHY_INTERFACE_MODE_NA: phylink_set(mask, 10000baseCR_Full); phylink_set(mask, 10000baseSR_Full); phylink_set(mask, 10000baseLR_Full); @@ -4391,7 +4412,11 @@ static void mvpp2_phylink_validate(struct net_device *dev, phylink_set(mask, 10000baseER_Full); phylink_set(mask, 10000baseKR_Full); /* Fall-through */ - default: + case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII_ID: + case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_RGMII_TXID: + case PHY_INTERFACE_MODE_SGMII: phylink_set(mask, 10baseT_Half); phylink_set(mask, 10baseT_Full); phylink_set(mask, 100baseT_Half); @@ -4403,11 +4428,18 @@ static void mvpp2_phylink_validate(struct net_device *dev, phylink_set(mask, 1000baseT_Full); phylink_set(mask, 1000baseX_Full); phylink_set(mask, 2500baseX_Full); + break; + default: + goto empty_set; } bitmap_and(supported, supported, mask, __ETHTOOL_LINK_MODE_MASK_NBITS); bitmap_and(state->advertising, state->advertising, mask, __ETHTOOL_LINK_MODE_MASK_NBITS); + return; + +empty_set: + bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS); } static void mvpp22_xlg_link_state(struct mvpp2_port *port, diff --git a/drivers/net/ethernet/mellanox/mlx4/Kconfig b/drivers/net/ethernet/mellanox/mlx4/Kconfig @@ -5,7 +5,7 @@ config MLX4_EN tristate "Mellanox Technologies 1/10/40Gbit Ethernet support" depends on MAY_USE_DEVLINK - depends on PCI + depends on PCI && NETDEVICES && ETHERNET && INET select MLX4_CORE imply PTP_1588_CLOCK ---help--- diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -1084,8 +1084,8 @@ static int mlx4_en_set_pauseparam(struct net_device *dev, tx_pause = !!(pause->tx_pause); rx_pause = !!(pause->rx_pause); - rx_ppp = priv->prof->rx_ppp && !(tx_pause || rx_pause); - tx_ppp = priv->prof->tx_ppp && !(tx_pause || rx_pause); + rx_ppp = (tx_pause || rx_pause) ? 0 : priv->prof->rx_ppp; + tx_ppp = (tx_pause || rx_pause) ? 0 : priv->prof->tx_ppp; err = mlx4_SET_PORT_general(mdev->dev, priv->port, priv->rx_skb_size + ETH_FCS_LEN, diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -3493,8 +3493,8 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, dev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM; } - /* MTU range: 46 - hw-specific max */ - dev->min_mtu = MLX4_EN_MIN_MTU; + /* MTU range: 68 - hw-specific max */ + dev->min_mtu = ETH_MIN_MTU; dev->max_mtu = priv->max_mtu; mdev->pndev[port] = dev; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -161,7 +161,6 @@ #define MLX4_SELFTEST_LB_MIN_MTU (MLX4_LOOPBACK_TEST_PAYLOAD + NET_IP_ALIGN + \ ETH_HLEN + PREAMBLE_LEN) -#define MLX4_EN_MIN_MTU 46 /* VLAN_HLEN is added twice,to support skb vlan tagged with multiple * headers. (For example: ETH_P_8021Q and ETH_P_8021AD). */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -724,9 +724,9 @@ static u32 mlx5e_get_fcs(const struct sk_buff *skb) return __get_unaligned_cpu32(fcs_bytes); } -static u8 get_ip_proto(struct sk_buff *skb, __be16 proto) +static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto) { - void *ip_p = skb->data + sizeof(struct ethhdr); + void *ip_p = skb->data + network_depth; return (proto == htons(ETH_P_IP)) ? ((struct iphdr *)ip_p)->protocol : ((struct ipv6hdr *)ip_p)->nexthdr; @@ -755,7 +755,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, goto csum_unnecessary; if (likely(is_last_ethertype_ip(skb, &network_depth, &proto))) { - if (unlikely(get_ip_proto(skb, proto) == IPPROTO_SCTP)) + if (unlikely(get_ip_proto(skb, network_depth, proto) == IPPROTO_SCTP)) goto csum_unnecessary; skb->ip_summed = CHECKSUM_COMPLETE; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c @@ -560,7 +560,7 @@ static void mlxsw_sp_nve_mc_list_ip_del(struct mlxsw_sp *mlxsw_sp, mc_record = mlxsw_sp_nve_mc_record_find(mc_list, proto, addr, &mc_entry); - if (WARN_ON(!mc_record)) + if (!mc_record) return; mlxsw_sp_nve_mc_record_entry_del(mc_record, mc_entry); @@ -647,7 +647,7 @@ void mlxsw_sp_nve_flood_ip_del(struct mlxsw_sp *mlxsw_sp, key.fid_index = mlxsw_sp_fid_index(fid); mc_list = mlxsw_sp_nve_mc_list_find(mlxsw_sp, &key); - if (WARN_ON(!mc_list)) + if (!mc_list) return; mlxsw_sp_nve_fid_flood_index_clear(fid, mc_list); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1275,15 +1275,12 @@ mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp, { u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN; enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt; - struct net_device *ipip_ul_dev; if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto) return false; - ipip_ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev); return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip, - ul_tb_id, ipip_entry) && - (!ipip_ul_dev || ipip_ul_dev == ul_dev); + ul_tb_id, ipip_entry); } /* Given decap parameters, find the corresponding IPIP entry. */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -296,7 +296,13 @@ static bool mlxsw_sp_bridge_port_should_destroy(const struct mlxsw_sp_bridge_port * bridge_port) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_port->dev); + struct net_device *dev = bridge_port->dev; + struct mlxsw_sp *mlxsw_sp; + + if (is_vlan_dev(dev)) + mlxsw_sp = mlxsw_sp_lower_get(vlan_dev_real_dev(dev)); + else + mlxsw_sp = mlxsw_sp_lower_get(dev); /* In case ports were pulled from out of a bridged LAG, then * it's possible the reference count isn't zero, yet the bridge @@ -2109,7 +2115,7 @@ mlxsw_sp_bridge_8021d_port_leave(struct mlxsw_sp_bridge_device *bridge_device, vid = is_vlan_dev(dev) ? vlan_dev_vlan_id(dev) : 1; mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid); - if (WARN_ON(!mlxsw_sp_port_vlan)) + if (!mlxsw_sp_port_vlan) return; mlxsw_sp_port_vlan_bridge_leave(mlxsw_sp_port_vlan); @@ -2134,8 +2140,10 @@ mlxsw_sp_bridge_8021d_vxlan_join(struct mlxsw_sp_bridge_device *bridge_device, if (!fid) return -EINVAL; - if (mlxsw_sp_fid_vni_is_set(fid)) - return -EINVAL; + if (mlxsw_sp_fid_vni_is_set(fid)) { + err = -EINVAL; + goto err_vni_exists; + } err = mlxsw_sp_nve_fid_enable(mlxsw_sp, fid, &params, extack); if (err) @@ -2149,6 +2157,7 @@ mlxsw_sp_bridge_8021d_vxlan_join(struct mlxsw_sp_bridge_device *bridge_device, return 0; err_nve_fid_enable: +err_vni_exists: mlxsw_sp_fid_put(fid); return err; } diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -476,16 +476,16 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev, if (err) goto err_destroy_flow; - err = nfp_flower_xmit_flow(netdev, flow_pay, - NFP_FLOWER_CMSG_TYPE_FLOW_ADD); - if (err) - goto err_destroy_flow; - flow_pay->tc_flower_cookie = flow->cookie; err = rhashtable_insert_fast(&priv->flow_table, &flow_pay->fl_node, nfp_flower_table_params); if (err) - goto err_destroy_flow; + goto err_release_metadata; + + err = nfp_flower_xmit_flow(netdev, flow_pay, + NFP_FLOWER_CMSG_TYPE_FLOW_ADD); + if (err) + goto err_remove_rhash; port->tc_offload_cnt++; @@ -494,6 +494,12 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev, return 0; +err_remove_rhash: + WARN_ON_ONCE(rhashtable_remove_fast(&priv->flow_table, + &flow_pay->fl_node, + nfp_flower_table_params)); +err_release_metadata: + nfp_modify_flow_metadata(app, flow_pay); err_destroy_flow: kfree(flow_pay->action_data); kfree(flow_pay->mask_data); diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c @@ -571,6 +571,7 @@ static irqreturn_t cp_interrupt (int irq, void *dev_instance) struct cp_private *cp; int handled = 0; u16 status; + u16 mask; if (unlikely(dev == NULL)) return IRQ_NONE; @@ -578,6 +579,10 @@ static irqreturn_t cp_interrupt (int irq, void *dev_instance) spin_lock(&cp->lock); + mask = cpr16(IntrMask); + if (!mask) + goto out_unlock; + status = cpr16(IntrStatus); if (!status || (status == 0xFFFF)) goto out_unlock; diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c @@ -185,8 +185,8 @@ NETIF_MSG_TX_ERR) /* Parameter for descriptor */ -#define AVE_NR_TXDESC 32 /* Tx descriptor */ -#define AVE_NR_RXDESC 64 /* Rx descriptor */ +#define AVE_NR_TXDESC 64 /* Tx descriptor */ +#define AVE_NR_RXDESC 256 /* Rx descriptor */ #define AVE_DESC_OFS_CMDSTS 0 #define AVE_DESC_OFS_ADDRL 4 @@ -194,6 +194,7 @@ /* Parameter for ethernet frame */ #define AVE_MAX_ETHFRAME 1518 +#define AVE_FRAME_HEADROOM 2 /* Parameter for interrupt */ #define AVE_INTM_COUNT 20 @@ -576,12 +577,13 @@ static int ave_rxdesc_prepare(struct net_device *ndev, int entry) skb = priv->rx.desc[entry].skbs; if (!skb) { - skb = netdev_alloc_skb_ip_align(ndev, - AVE_MAX_ETHFRAME); + skb = netdev_alloc_skb(ndev, AVE_MAX_ETHFRAME); if (!skb) { netdev_err(ndev, "can't allocate skb for Rx\n"); return -ENOMEM; } + skb->data += AVE_FRAME_HEADROOM; + skb->tail += AVE_FRAME_HEADROOM; } /* set disable to cmdsts */ @@ -594,12 +596,12 @@ static int ave_rxdesc_prepare(struct net_device *ndev, int entry) * - Rx buffer begins with 2 byte headroom, and data will be put from * (buffer + 2). * To satisfy this, specify the address to put back the buffer - * pointer advanced by NET_IP_ALIGN by netdev_alloc_skb_ip_align(), - * and expand the map size by NET_IP_ALIGN. + * pointer advanced by AVE_FRAME_HEADROOM, and expand the map size + * by AVE_FRAME_HEADROOM. */ ret = ave_dma_map(ndev, &priv->rx.desc[entry], - skb->data - NET_IP_ALIGN, - AVE_MAX_ETHFRAME + NET_IP_ALIGN, + skb->data - AVE_FRAME_HEADROOM, + AVE_MAX_ETHFRAME + AVE_FRAME_HEADROOM, DMA_FROM_DEVICE, &paddr); if (ret) { netdev_err(ndev, "can't map skb for Rx\n"); @@ -1689,9 +1691,10 @@ static int ave_probe(struct platform_device *pdev) pdev->name, pdev->id); /* Register as a NAPI supported driver */ - netif_napi_add(ndev, &priv->napi_rx, ave_napi_poll_rx, priv->rx.ndesc); + netif_napi_add(ndev, &priv->napi_rx, ave_napi_poll_rx, + NAPI_POLL_WEIGHT); netif_tx_napi_add(ndev, &priv->napi_tx, ave_napi_poll_tx, - priv->tx.ndesc); + NAPI_POLL_WEIGHT); platform_set_drvdata(pdev, ndev); @@ -1913,5 +1916,6 @@ static struct platform_driver ave_driver = { }; module_platform_driver(ave_driver); +MODULE_AUTHOR("Kunihiko Hayashi <hayashi.kunihiko@socionext.com>"); MODULE_DESCRIPTION("Socionext UniPhier AVE ethernet driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2550,12 +2550,6 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp) netdev_warn(priv->dev, "PTP init failed\n"); } -#ifdef CONFIG_DEBUG_FS - ret = stmmac_init_fs(dev); - if (ret < 0) - netdev_warn(priv->dev, "%s: failed debugFS registration\n", - __func__); -#endif priv->tx_lpi_timer = STMMAC_DEFAULT_TWT_LS; if (priv->use_riwt) { @@ -2756,10 +2750,6 @@ static int stmmac_release(struct net_device *dev) netif_carrier_off(dev); -#ifdef CONFIG_DEBUG_FS - stmmac_exit_fs(dev); -#endif - stmmac_release_ptp(priv); return 0; @@ -3899,6 +3889,9 @@ static int stmmac_sysfs_ring_read(struct seq_file *seq, void *v) u32 tx_count = priv->plat->tx_queues_to_use; u32 queue; + if ((dev->flags & IFF_UP) == 0) + return 0; + for (queue = 0; queue < rx_count; queue++) { struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; @@ -4397,6 +4390,13 @@ int stmmac_dvr_probe(struct device *device, goto error_netdev_register; } +#ifdef CONFIG_DEBUG_FS + ret = stmmac_init_fs(ndev); + if (ret < 0) + netdev_warn(priv->dev, "%s: failed debugFS registration\n", + __func__); +#endif + return ret; error_netdev_register: @@ -4432,6 +4432,9 @@ int stmmac_dvr_remove(struct device *dev) netdev_info(priv->dev, "%s: removing driver", __func__); +#ifdef CONFIG_DEBUG_FS + stmmac_exit_fs(ndev); +#endif stmmac_stop_all_dma(priv); stmmac_mac_set(priv, priv->ioaddr, false); diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c @@ -608,7 +608,7 @@ static int macvlan_open(struct net_device *dev) goto hash_add; } - err = -EBUSY; + err = -EADDRINUSE; if (macvlan_addr_busy(vlan->port, dev->dev_addr)) goto out; @@ -706,7 +706,7 @@ static int macvlan_sync_address(struct net_device *dev, unsigned char *addr) } else { /* Rehash and update the device filters */ if (macvlan_addr_busy(vlan->port, addr)) - return -EBUSY; + return -EADDRINUSE; if (!macvlan_passthru(port)) { err = dev_uc_add(lowerdev, addr); @@ -747,6 +747,9 @@ static int macvlan_set_mac_address(struct net_device *dev, void *p) return dev_set_mac_address(vlan->lowerdev, addr); } + if (macvlan_addr_busy(vlan->port, addr->sa_data)) + return -EADDRINUSE; + return macvlan_sync_address(dev, addr->sa_data); } diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c @@ -1880,20 +1880,17 @@ EXPORT_SYMBOL(genphy_loopback); static int __set_phy_supported(struct phy_device *phydev, u32 max_speed) { - phydev->supported &= ~(PHY_1000BT_FEATURES | PHY_100BT_FEATURES | - PHY_10BT_FEATURES); - switch (max_speed) { - default: - return -ENOTSUPP; - case SPEED_1000: - phydev->supported |= PHY_1000BT_FEATURES; + case SPEED_10: + phydev->supported &= ~PHY_100BT_FEATURES; /* fall through */ case SPEED_100: - phydev->supported |= PHY_100BT_FEATURES; - /* fall through */ - case SPEED_10: - phydev->supported |= PHY_10BT_FEATURES; + phydev->supported &= ~PHY_1000BT_FEATURES; + break; + case SPEED_1000: + break; + default: + return -ENOTSUPP; } return 0; diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c @@ -162,7 +162,7 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, /* 1000Base-PX or 1000Base-BX10 */ if ((id->base.e_base_px || id->base.e_base_bx10) && br_min <= 1300 && br_max >= 1200) - phylink_set(support, 1000baseX_Full); + phylink_set(modes, 1000baseX_Full); /* For active or passive cables, select the link modes * based on the bit rates and the cable compliance bytes. diff --git a/drivers/net/tun.c b/drivers/net/tun.c @@ -2293,9 +2293,9 @@ static void tun_setup(struct net_device *dev) static int tun_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { - if (!data) - return 0; - return -EINVAL; + NL_SET_ERR_MSG(extack, + "tun/tap creation via rtnetlink is not supported."); + return -EOPNOTSUPP; } static size_t tun_get_size(const struct net_device *dev) @@ -2385,6 +2385,7 @@ static int tun_xdp_one(struct tun_struct *tun, struct tun_file *tfile, struct xdp_buff *xdp, int *flush) { + unsigned int datasize = xdp->data_end - xdp->data; struct tun_xdp_hdr *hdr = xdp->data_hard_start; struct virtio_net_hdr *gso = &hdr->gso; struct tun_pcpu_stats *stats; @@ -2461,7 +2462,7 @@ build: stats = get_cpu_ptr(tun->pcpu_stats); u64_stats_update_begin(&stats->syncp); stats->rx_packets++; - stats->rx_bytes += skb->len; + stats->rx_bytes += datasize; u64_stats_update_end(&stats->syncp); put_cpu_ptr(stats); diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c @@ -365,7 +365,8 @@ static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx) static struct sk_buff *page_to_skb(struct virtnet_info *vi, struct receive_queue *rq, struct page *page, unsigned int offset, - unsigned int len, unsigned int truesize) + unsigned int len, unsigned int truesize, + bool hdr_valid) { struct sk_buff *skb; struct virtio_net_hdr_mrg_rxbuf *hdr; @@ -387,7 +388,8 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, else hdr_padded_len = sizeof(struct padded_vnet_hdr); - memcpy(hdr, p, hdr_len); + if (hdr_valid) + memcpy(hdr, p, hdr_len); len -= hdr_len; offset += hdr_padded_len; @@ -739,7 +741,8 @@ static struct sk_buff *receive_big(struct net_device *dev, struct virtnet_rq_stats *stats) { struct page *page = buf; - struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE); + struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, + PAGE_SIZE, true); stats->bytes += len - vi->hdr_len; if (unlikely(!skb)) @@ -842,7 +845,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, rcu_read_unlock(); put_page(page); head_skb = page_to_skb(vi, rq, xdp_page, - offset, len, PAGE_SIZE); + offset, len, + PAGE_SIZE, false); return head_skb; } break; @@ -898,7 +902,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, goto err_skb; } - head_skb = page_to_skb(vi, rq, page, offset, len, truesize); + head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog); curr_skb = head_skb; if (unlikely(!curr_skb)) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c @@ -2884,6 +2884,10 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, wiphy_ext_feature_set(hw->wiphy, NL80211_EXT_FEATURE_CQM_RSSI_LIST); + tasklet_hrtimer_init(&data->beacon_timer, + mac80211_hwsim_beacon, + CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + err = ieee80211_register_hw(hw); if (err < 0) { pr_debug("mac80211_hwsim: ieee80211_register_hw failed (%d)\n", @@ -2908,10 +2912,6 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, data->debugfs, data, &hwsim_simulate_radar); - tasklet_hrtimer_init(&data->beacon_timer, - mac80211_hwsim_beacon, - CLOCK_MONOTONIC, HRTIMER_MODE_ABS); - spin_lock_bh(&hwsim_radio_lock); err = rhashtable_insert_fast(&hwsim_radios_rht, &data->rht, hwsim_rht_params); @@ -3703,16 +3703,16 @@ static int __init init_mac80211_hwsim(void) if (err) goto out_unregister_pernet; + err = hwsim_init_netlink(); + if (err) + goto out_unregister_driver; + hwsim_class = class_create(THIS_MODULE, "mac80211_hwsim"); if (IS_ERR(hwsim_class)) { err = PTR_ERR(hwsim_class); - goto out_unregister_driver; + goto out_exit_netlink; } - err = hwsim_init_netlink(); - if (err < 0) - goto out_unregister_driver; - for (i = 0; i < radios; i++) { struct hwsim_new_radio_params param = { 0 }; @@ -3818,6 +3818,8 @@ out_free_mon: free_netdev(hwsim_mon); out_free_radios: mac80211_hwsim_free(); +out_exit_netlink: + hwsim_exit_netlink(); out_unregister_driver: platform_driver_unregister(&mac80211_hwsim_driver); out_unregister_pernet: diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c @@ -944,10 +944,7 @@ static void vhost_iotlb_notify_vq(struct vhost_dev *d, if (msg->iova <= vq_msg->iova && msg->iova + msg->size - 1 >= vq_msg->iova && vq_msg->type == VHOST_IOTLB_MISS) { - mutex_lock(&node->vq->mutex); vhost_poll_queue(&node->vq->poll); - mutex_unlock(&node->vq->mutex); - list_del(&node->node); kfree(node); } diff --git a/include/linux/filter.h b/include/linux/filter.h @@ -449,6 +449,13 @@ struct sock_reuseport; offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1 #define bpf_ctx_range_till(TYPE, MEMBER1, MEMBER2) \ offsetof(TYPE, MEMBER1) ... offsetofend(TYPE, MEMBER2) - 1 +#if BITS_PER_LONG == 64 +# define bpf_ctx_range_ptr(TYPE, MEMBER) \ + offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1 +#else +# define bpf_ctx_range_ptr(TYPE, MEMBER) \ + offsetof(TYPE, MEMBER) ... offsetof(TYPE, MEMBER) + 8 - 1 +#endif /* BITS_PER_LONG == 64 */ #define bpf_target_off(TYPE, MEMBER, SIZE, PTR_SIZE) \ ({ \ diff --git a/include/linux/sfp.h b/include/linux/sfp.h @@ -224,7 +224,7 @@ struct sfp_eeprom_ext { * * See the SFF-8472 specification and related documents for the definition * of these structure members. This can be obtained from - * ftp://ftp.seagate.com/sff + * https://www.snia.org/technology-communities/sff/specifications */ struct sfp_eeprom_id { struct sfp_eeprom_base base; diff --git a/include/net/neighbour.h b/include/net/neighbour.h @@ -454,6 +454,7 @@ static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb) static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb) { + unsigned int hh_alen = 0; unsigned int seq; unsigned int hh_len; @@ -461,16 +462,33 @@ static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb seq = read_seqbegin(&hh->hh_lock); hh_len = hh->hh_len; if (likely(hh_len <= HH_DATA_MOD)) { - /* this is inlined by gcc */ - memcpy(skb->data - HH_DATA_MOD, hh->hh_data, HH_DATA_MOD); + hh_alen = HH_DATA_MOD; + + /* skb_push() would proceed silently if we have room for + * the unaligned size but not for the aligned size: + * check headroom explicitly. + */ + if (likely(skb_headroom(skb) >= HH_DATA_MOD)) { + /* this is inlined by gcc */ + memcpy(skb->data - HH_DATA_MOD, hh->hh_data, + HH_DATA_MOD); + } } else { - unsigned int hh_alen = HH_DATA_ALIGN(hh_len); + hh_alen = HH_DATA_ALIGN(hh_len); - memcpy(skb->data - hh_alen, hh->hh_data, hh_alen); + if (likely(skb_headroom(skb) >= hh_alen)) { + memcpy(skb->data - hh_alen, hh->hh_data, + hh_alen); + } } } while (read_seqretry(&hh->hh_lock, seq)); - skb_push(skb, hh_len); + if (WARN_ON_ONCE(skb_headroom(skb) < hh_alen)) { + kfree_skb(skb); + return NET_XMIT_DROP; + } + + __skb_push(skb, hh_len); return dev_queue_xmit(skb); } diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h @@ -620,4 +620,9 @@ static inline bool sctp_transport_pmtu_check(struct sctp_transport *t) return false; } +static inline __u32 sctp_min_frag_point(struct sctp_sock *sp, __u16 datasize) +{ + return sctp_mtu_payload(sp, SCTP_DEFAULT_MINSEGMENT, datasize); +} + #endif /* __net_sctp_h__ */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h @@ -2075,6 +2075,8 @@ struct sctp_association { __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1]; __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1]; + + struct rcu_head rcu; }; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h @@ -2170,7 +2170,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) + * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) * Description * Look for TCP socket matching *tuple*, optionally in a child * network namespace *netns*. The return value must be checked, @@ -2187,12 +2187,14 @@ union bpf_attr { * **sizeof**\ (*tuple*\ **->ipv6**) * Look for an IPv6 socket. * - * If the *netns* is zero, then the socket lookup table in the - * netns associated with the *ctx* will be used. For the TC hooks, - * this in the netns of the device in the skb. For socket hooks, - * this in the netns of the socket. If *netns* is non-zero, then - * it specifies the ID of the netns relative to the netns - * associated with the *ctx*. + * If the *netns* is a negative signed 32-bit integer, then the + * socket lookup table in the netns associated with the *ctx* will + * will be used. For the TC hooks, this is the netns of the device + * in the skb. For socket hooks, this is the netns of the socket. + * If *netns* is any other signed 32-bit value greater than or + * equal to zero then it specifies the ID of the netns relative to + * the netns associated with the *ctx*. *netns* values beyond the + * range of 32-bit integers are reserved for future use. * * All values for *flags* are reserved for future usage, and must * be left at zero. @@ -2201,8 +2203,10 @@ union bpf_attr { * **CONFIG_NET** configuration option. * Return * Pointer to *struct bpf_sock*, or NULL in case of failure. + * For sockets with reuseport option, the *struct bpf_sock* + * result is from reuse->socks[] using the hash of the tuple. * - * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) + * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) * Description * Look for UDP socket matching *tuple*, optionally in a child * network namespace *netns*. The return value must be checked, @@ -2219,12 +2223,14 @@ union bpf_attr { * **sizeof**\ (*tuple*\ **->ipv6**) * Look for an IPv6 socket. * - * If the *netns* is zero, then the socket lookup table in the - * netns associated with the *ctx* will be used. For the TC hooks, - * this in the netns of the device in the skb. For socket hooks, - * this in the netns of the socket. If *netns* is non-zero, then - * it specifies the ID of the netns relative to the netns - * associated with the *ctx*. + * If the *netns* is a negative signed 32-bit integer, then the + * socket lookup table in the netns associated with the *ctx* will + * will be used. For the TC hooks, this is the netns of the device + * in the skb. For socket hooks, this is the netns of the socket. + * If *netns* is any other signed 32-bit value greater than or + * equal to zero then it specifies the ID of the netns relative to + * the netns associated with the *ctx*. *netns* values beyond the + * range of 32-bit integers are reserved for future use. * * All values for *flags* are reserved for future usage, and must * be left at zero. @@ -2233,6 +2239,8 @@ union bpf_attr { * **CONFIG_NET** configuration option. * Return * Pointer to *struct bpf_sock*, or NULL in case of failure. + * For sockets with reuseport option, the *struct bpf_sock* + * result is from reuse->socks[] using the hash of the tuple. * * int bpf_sk_release(struct bpf_sock *sk) * Description @@ -2405,6 +2413,9 @@ enum bpf_func_id { /* BPF_FUNC_perf_event_output for sk_buff input context. */ #define BPF_F_CTXLEN_MASK (0xfffffULL << 32) +/* Current network namespace */ +#define BPF_F_CURRENT_NETNS (-1L) + /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { BPF_ADJ_ROOM_NET, @@ -2422,6 +2433,12 @@ enum bpf_lwt_encap_mode { BPF_LWT_ENCAP_SEG6_INLINE }; +#define __bpf_md_ptr(type, name) \ +union { \ + type name; \ + __u64 :64; \ +} __attribute__((aligned(8))) + /* user accessible mirror of in-kernel sk_buff. * new fields can only be added to the end of this structure */ @@ -2456,7 +2473,7 @@ struct __sk_buff { /* ... here. */ __u32 data_meta; - struct bpf_flow_keys *flow_keys; + __bpf_md_ptr(struct bpf_flow_keys *, flow_keys); }; struct bpf_tunnel_key { @@ -2572,8 +2589,8 @@ enum sk_action { * be added to the end of this structure */ struct sk_msg_md { - void *data; - void *data_end; + __bpf_md_ptr(void *, data); + __bpf_md_ptr(void *, data_end); __u32 family; __u32 remote_ip4; /* Stored in network byte order */ @@ -2589,8 +2606,9 @@ struct sk_reuseport_md { * Start of directly accessible data. It begins from * the tcp/udp header. */ - void *data; - void *data_end; /* End of directly accessible data */ + __bpf_md_ptr(void *, data); + /* End of directly accessible data */ + __bpf_md_ptr(void *, data_end); /* * Total length of packet (starting from the tcp/udp header). * Note that the directly accessible bytes (data_end - data) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c @@ -5,6 +5,7 @@ #include <uapi/linux/types.h> #include <linux/seq_file.h> #include <linux/compiler.h> +#include <linux/ctype.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/anon_inodes.h> @@ -426,6 +427,30 @@ static bool btf_name_offset_valid(const struct btf *btf, u32 offset) offset < btf->hdr.str_len; } +/* Only C-style identifier is permitted. This can be relaxed if + * necessary. + */ +static bool btf_name_valid_identifier(const struct btf *btf, u32 offset) +{ + /* offset must be valid */ + const char *src = &btf->strings[offset]; + const char *src_limit; + + if (!isalpha(*src) && *src != '_') + return false; + + /* set a limit on identifier length */ + src_limit = src + KSYM_NAME_LEN; + src++; + while (*src && src < src_limit) { + if (!isalnum(*src) && *src != '_') + return false; + src++; + } + + return !*src; +} + static const char *btf_name_by_offset(const struct btf *btf, u32 offset) { if (!offset) @@ -1143,6 +1168,22 @@ static int btf_ref_type_check_meta(struct btf_verifier_env *env, return -EINVAL; } + /* typedef type must have a valid name, and other ref types, + * volatile, const, restrict, should have a null name. + */ + if (BTF_INFO_KIND(t->info) == BTF_KIND_TYPEDEF) { + if (!t->name_off || + !btf_name_valid_identifier(env->btf, t->name_off)) { + btf_verifier_log_type(env, t, "Invalid name"); + return -EINVAL; + } + } else { + if (t->name_off) { + btf_verifier_log_type(env, t, "Invalid name"); + return -EINVAL; + } + } + btf_verifier_log_type(env, t, NULL); return 0; @@ -1300,6 +1341,13 @@ static s32 btf_fwd_check_meta(struct btf_verifier_env *env, return -EINVAL; } + /* fwd type must have a valid name */ + if (!t->name_off || + !btf_name_valid_identifier(env->btf, t->name_off)) { + btf_verifier_log_type(env, t, "Invalid name"); + return -EINVAL; + } + btf_verifier_log_type(env, t, NULL); return 0; @@ -1356,6 +1404,12 @@ static s32 btf_array_check_meta(struct btf_verifier_env *env, return -EINVAL; } + /* array type should not have a name */ + if (t->name_off) { + btf_verifier_log_type(env, t, "Invalid name"); + return -EINVAL; + } + if (btf_type_vlen(t)) { btf_verifier_log_type(env, t, "vlen != 0"); return -EINVAL; @@ -1532,6 +1586,13 @@ static s32 btf_struct_check_meta(struct btf_verifier_env *env, return -EINVAL; } + /* struct type either no name or a valid one */ + if (t->name_off && + !btf_name_valid_identifier(env->btf, t->name_off)) { + btf_verifier_log_type(env, t, "Invalid name"); + return -EINVAL; + } + btf_verifier_log_type(env, t, NULL); last_offset = 0; @@ -1543,6 +1604,12 @@ static s32 btf_struct_check_meta(struct btf_verifier_env *env, return -EINVAL; } + /* struct member either no name or a valid one */ + if (member->name_off && + !btf_name_valid_identifier(btf, member->name_off)) { + btf_verifier_log_member(env, t, member, "Invalid name"); + return -EINVAL; + } /* A member cannot be in type void */ if (!member->type || !BTF_TYPE_ID_VALID(member->type)) { btf_verifier_log_member(env, t, member, @@ -1730,6 +1797,13 @@ static s32 btf_enum_check_meta(struct btf_verifier_env *env, return -EINVAL; } + /* enum type either no name or a valid one */ + if (t->name_off && + !btf_name_valid_identifier(env->btf, t->name_off)) { + btf_verifier_log_type(env, t, "Invalid name"); + return -EINVAL; + } + btf_verifier_log_type(env, t, NULL); for (i = 0; i < nr_enums; i++) { @@ -1739,6 +1813,14 @@ static s32 btf_enum_check_meta(struct btf_verifier_env *env, return -EINVAL; } + /* enum member must have a valid name */ + if (!enums[i].name_off || + !btf_name_valid_identifier(btf, enums[i].name_off)) { + btf_verifier_log_type(env, t, "Invalid name"); + return -EINVAL; + } + + btf_verifier_log(env, "\t%s val=%d\n", btf_name_by_offset(btf, enums[i].name_off), enums[i].val); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c @@ -175,6 +175,7 @@ struct bpf_verifier_stack_elem { #define BPF_COMPLEXITY_LIMIT_INSNS 131072 #define BPF_COMPLEXITY_LIMIT_STACK 1024 +#define BPF_COMPLEXITY_LIMIT_STATES 64 #define BPF_MAP_PTR_UNPRIV 1UL #define BPF_MAP_PTR_POISON ((void *)((0xeB9FUL << 1) + \ @@ -3751,6 +3752,79 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, } } +/* compute branch direction of the expression "if (reg opcode val) goto target;" + * and return: + * 1 - branch will be taken and "goto target" will be executed + * 0 - branch will not be taken and fall-through to next insn + * -1 - unknown. Example: "if (reg < 5)" is unknown when register value range [0,10] + */ +static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode) +{ + if (__is_pointer_value(false, reg)) + return -1; + + switch (opcode) { + case BPF_JEQ: + if (tnum_is_const(reg->var_off)) + return !!tnum_equals_const(reg->var_off, val); + break; + case BPF_JNE: + if (tnum_is_const(reg->var_off)) + return !tnum_equals_const(reg->var_off, val); + break; + case BPF_JGT: + if (reg->umin_value > val) + return 1; + else if (reg->umax_value <= val) + return 0; + break; + case BPF_JSGT: + if (reg->smin_value > (s64)val) + return 1; + else if (reg->smax_value < (s64)val) + return 0; + break; + case BPF_JLT: + if (reg->umax_value < val) + return 1; + else if (reg->umin_value >= val) + return 0; + break; + case BPF_JSLT: + if (reg->smax_value < (s64)val) + return 1; + else if (reg->smin_value >= (s64)val) + return 0; + break; + case BPF_JGE: + if (reg->umin_value >= val) + return 1; + else if (reg->umax_value < val) + return 0; + break; + case BPF_JSGE: + if (reg->smin_value >= (s64)val) + return 1; + else if (reg->smax_value < (s64)val) + return 0; + break; + case BPF_JLE: + if (reg->umax_value <= val) + return 1; + else if (reg->umin_value > val) + return 0; + break; + case BPF_JSLE: + if (reg->smax_value <= (s64)val) + return 1; + else if (reg->smin_value > (s64)val) + return 0; + break; + } + + return -1; +} + /* Adjusts the register min/max values in the case that the dst_reg is the * variable register that we are working on, and src_reg is a constant or we're * simply doing a BPF_K check. @@ -4152,21 +4226,15 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, dst_reg = &regs[insn->dst_reg]; - /* detect if R == 0 where R was initialized to zero earlier */ - if (BPF_SRC(insn->code) == BPF_K && - (opcode == BPF_JEQ || opcode == BPF_JNE) && - dst_reg->type == SCALAR_VALUE && - tnum_is_const(dst_reg->var_off)) { - if ((opcode == BPF_JEQ && dst_reg->var_off.value == insn->imm) || - (opcode == BPF_JNE && dst_reg->var_off.value != insn->imm)) { - /* if (imm == imm) goto pc+off; - * only follow the goto, ignore fall-through - */ + if (BPF_SRC(insn->code) == BPF_K) { + int pred = is_branch_taken(dst_reg, insn->imm, opcode); + + if (pred == 1) { + /* only follow the goto, ignore fall-through */ *insn_idx += insn->off; return 0; - } else { - /* if (imm != imm) goto pc+off; - * only follow fall-through branch, since + } else if (pred == 0) { + /* only follow fall-through branch, since * that's where the program will go */ return 0; @@ -4980,7 +5048,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) struct bpf_verifier_state_list *new_sl; struct bpf_verifier_state_list *sl; struct bpf_verifier_state *cur = env->cur_state, *new; - int i, j, err; + int i, j, err, states_cnt = 0; sl = env->explored_states[insn_idx]; if (!sl) @@ -5007,8 +5075,12 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) return 1; } sl = sl->next; + states_cnt++; } + if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES) + return 0; + /* there were no equivalent states, remember current one. * technically the current state is not proven to be safe yet, * but it will either reach outer most bpf_exit (which means it's safe) @@ -5148,6 +5220,9 @@ static int do_check(struct bpf_verifier_env *env) goto process_bpf_exit; } + if (signal_pending(current)) + return -EAGAIN; + if (need_resched()) cond_resched(); diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c @@ -28,12 +28,13 @@ static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx, return ret; } -static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time) +static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *ret, + u32 *time) { struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { 0 }; enum bpf_cgroup_storage_type stype; u64 time_start, time_spent = 0; - u32 ret = 0, i; + u32 i; for_each_cgroup_storage_type(stype) { storage[stype] = bpf_cgroup_storage_alloc(prog, stype); @@ -49,7 +50,7 @@ static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time) repeat = 1; time_start = ktime_get_ns(); for (i = 0; i < repeat; i++) { - ret = bpf_test_run_one(prog, ctx, storage); + *ret = bpf_test_run_one(prog, ctx, storage); if (need_resched()) { if (signal_pending(current)) break; @@ -65,7 +66,7 @@ static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time) for_each_cgroup_storage_type(stype) bpf_cgroup_storage_free(storage[stype]); - return ret; + return 0; } static int bpf_test_finish(const union bpf_attr *kattr, @@ -165,7 +166,12 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, __skb_push(skb, hh_len); if (is_direct_pkt_access) bpf_compute_data_pointers(skb); - retval = bpf_test_run(prog, skb, repeat, &duration); + ret = bpf_test_run(prog, skb, repeat, &retval, &duration); + if (ret) { + kfree_skb(skb); + kfree(sk); + return ret; + } if (!is_l2) { if (skb_headroom(skb) < hh_len) { int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb)); @@ -212,11 +218,14 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0); xdp.rxq = &rxqueue->xdp_rxq; - retval = bpf_test_run(prog, &xdp, repeat, &duration); + ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration); + if (ret) + goto out; if (xdp.data != data + XDP_PACKET_HEADROOM + NET_IP_ALIGN || xdp.data_end != xdp.data + size) size = xdp.data_end - xdp.data; ret = bpf_test_finish(kattr, uattr, xdp.data, size, retval, duration); +out: kfree(data); return ret; } diff --git a/net/core/dev.c b/net/core/dev.c @@ -2175,6 +2175,20 @@ static bool remove_xps_queue_cpu(struct net_device *dev, return active; } +static void reset_xps_maps(struct net_device *dev, + struct xps_dev_maps *dev_maps, + bool is_rxqs_map) +{ + if (is_rxqs_map) { + static_key_slow_dec_cpuslocked(&xps_rxqs_needed); + RCU_INIT_POINTER(dev->xps_rxqs_map, NULL); + } else { + RCU_INIT_POINTER(dev->xps_cpus_map, NULL); + } + static_key_slow_dec_cpuslocked(&xps_needed); + kfree_rcu(dev_maps, rcu); +} + static void clean_xps_maps(struct net_device *dev, const unsigned long *mask, struct xps_dev_maps *dev_maps, unsigned int nr_ids, u16 offset, u16 count, bool is_rxqs_map) @@ -2186,18 +2200,15 @@ static void clean_xps_maps(struct net_device *dev, const unsigned long *mask, j < nr_ids;) active |= remove_xps_queue_cpu(dev, dev_maps, j, offset, count); - if (!active) { - if (is_rxqs_map) { - RCU_INIT_POINTER(dev->xps_rxqs_map, NULL); - } else { - RCU_INIT_POINTER(dev->xps_cpus_map, NULL); + if (!active) + reset_xps_maps(dev, dev_maps, is_rxqs_map); - for (i = offset + (count - 1); count--; i--) - netdev_queue_numa_node_write( - netdev_get_tx_queue(dev, i), - NUMA_NO_NODE); + if (!is_rxqs_map) { + for (i = offset + (count - 1); count--; i--) { + netdev_queue_numa_node_write( + netdev_get_tx_queue(dev, i), + NUMA_NO_NODE); } - kfree_rcu(dev_maps, rcu); } } @@ -2234,10 +2245,6 @@ static void netif_reset_xps_queues(struct net_device *dev, u16 offset, false); out_no_maps: - if (static_key_enabled(&xps_rxqs_needed)) - static_key_slow_dec_cpuslocked(&xps_rxqs_needed); - - static_key_slow_dec_cpuslocked(&xps_needed); mutex_unlock(&xps_map_mutex); cpus_read_unlock(); } @@ -2355,9 +2362,12 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask, if (!new_dev_maps) goto out_no_new_maps; - static_key_slow_inc_cpuslocked(&xps_needed); - if (is_rxqs_map) - static_key_slow_inc_cpuslocked(&xps_rxqs_needed); + if (!dev_maps) { + /* Increment static keys at most once per type */ + static_key_slow_inc_cpuslocked(&xps_needed); + if (is_rxqs_map) + static_key_slow_inc_cpuslocked(&xps_rxqs_needed); + } for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids), j < nr_ids;) { @@ -2455,13 +2465,8 @@ out_no_new_maps: } /* free map if not active */ - if (!active) { - if (is_rxqs_map) - RCU_INIT_POINTER(dev->xps_rxqs_map, NULL); - else - RCU_INIT_POINTER(dev->xps_cpus_map, NULL); - kfree_rcu(dev_maps, rcu); - } + if (!active) + reset_xps_maps(dev, dev_maps, is_rxqs_map); out_no_maps: mutex_unlock(&xps_map_mutex); @@ -5009,7 +5014,7 @@ static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemallo struct net_device *orig_dev = skb->dev; struct packet_type *pt_prev = NULL; - list_del(&skb->list); + skb_list_del_init(skb); __netif_receive_skb_core(skb, pfmemalloc, &pt_prev); if (!pt_prev) continue; @@ -5165,7 +5170,7 @@ static void netif_receive_skb_list_internal(struct list_head *head) INIT_LIST_HEAD(&sublist); list_for_each_entry_safe(skb, next, head, list) { net_timestamp_check(netdev_tstamp_prequeue, skb); - list_del(&skb->list); + skb_list_del_init(skb); if (!skb_defer_rx_timestamp(skb)) list_add_tail(&skb->list, &sublist); } @@ -5176,7 +5181,7 @@ static void netif_receive_skb_list_internal(struct list_head *head) rcu_read_lock(); list_for_each_entry_safe(skb, next, head, list) { xdp_prog = rcu_dereference(skb->dev->xdp_prog); - list_del(&skb->list); + skb_list_del_init(skb); if (do_xdp_generic(xdp_prog, skb) == XDP_PASS) list_add_tail(&skb->list, &sublist); } @@ -5195,7 +5200,7 @@ static void netif_receive_skb_list_internal(struct list_head *head) if (cpu >= 0) { /* Will be handled, remove from list */ - list_del(&skb->list); + skb_list_del_init(skb); enqueue_to_backlog(skb, cpu, &rflow->last_qtail); } } @@ -6204,8 +6209,8 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi, napi->skb = NULL; napi->poll = poll; if (weight > NAPI_POLL_WEIGHT) - pr_err_once("netif_napi_add() called with weight %d on device %s\n", - weight, dev->name); + netdev_err_once(dev, "%s() called with weight %d\n", __func__, + weight); napi->weight = weight; list_add(&napi->dev_list, &dev->napi_list); napi->dev = dev; diff --git a/net/core/filter.c b/net/core/filter.c @@ -4890,22 +4890,23 @@ bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, struct net *net; family = len == sizeof(tuple->ipv4) ? AF_INET : AF_INET6; - if (unlikely(family == AF_UNSPEC || netns_id > U32_MAX || flags)) + if (unlikely(family == AF_UNSPEC || flags || + !((s32)netns_id < 0 || netns_id <= S32_MAX))) goto out; if (skb->dev) caller_net = dev_net(skb->dev); else caller_net = sock_net(skb->sk); - if (netns_id) { + if ((s32)netns_id < 0) { + net = caller_net; + sk = sk_lookup(net, tuple, skb, family, proto); + } else { net = get_net_ns_by_id(caller_net, netns_id); if (unlikely(!net)) goto out; sk = sk_lookup(net, tuple, skb, family, proto); put_net(net); - } else { - net = caller_net; - sk = sk_lookup(net, tuple, skb, family, proto); } if (sk) @@ -5435,8 +5436,8 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type if (size != size_default) return false; break; - case bpf_ctx_range(struct __sk_buff, flow_keys): - if (size != sizeof(struct bpf_flow_keys *)) + case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): + if (size != sizeof(__u64)) return false; break; default: @@ -5464,7 +5465,7 @@ static bool sk_filter_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, data): case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, data_end): - case bpf_ctx_range(struct __sk_buff, flow_keys): + case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): case bpf_ctx_range_till(struct __sk_buff, family, local_port): return false; } @@ -5489,7 +5490,7 @@ static bool cg_skb_is_valid_access(int off, int size, switch (off) { case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range(struct __sk_buff, data_meta): - case bpf_ctx_range(struct __sk_buff, flow_keys): + case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): return false; case bpf_ctx_range(struct __sk_buff, data): case bpf_ctx_range(struct __sk_buff, data_end): @@ -5530,7 +5531,7 @@ static bool lwt_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range_till(struct __sk_buff, family, local_port): case bpf_ctx_range(struct __sk_buff, data_meta): - case bpf_ctx_range(struct __sk_buff, flow_keys): + case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): return false; } @@ -5756,7 +5757,7 @@ static bool tc_cls_act_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, data_end): info->reg_type = PTR_TO_PACKET_END; break; - case bpf_ctx_range(struct __sk_buff, flow_keys): + case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): case bpf_ctx_range_till(struct __sk_buff, family, local_port): return false; } @@ -5958,7 +5959,7 @@ static bool sk_skb_is_valid_access(int off, int size, switch (off) { case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range(struct __sk_buff, data_meta): - case bpf_ctx_range(struct __sk_buff, flow_keys): + case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): return false; } @@ -6039,7 +6040,7 @@ static bool flow_dissector_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, data_end): info->reg_type = PTR_TO_PACKET_END; break; - case bpf_ctx_range(struct __sk_buff, flow_keys): + case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): info->reg_type = PTR_TO_FLOW_KEYS; break; case bpf_ctx_range(struct __sk_buff, tc_classid): diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c @@ -3800,6 +3800,9 @@ int ndo_dflt_fdb_dump(struct sk_buff *skb, { int err; + if (dev->type != ARPHRD_ETHER) + return -EINVAL; + netif_addr_lock_bh(dev); err = nlmsg_populate_fdb(skb, cb, dev, idx, &dev->uc); if (err) diff --git a/net/dsa/master.c b/net/dsa/master.c @@ -158,8 +158,31 @@ static void dsa_master_ethtool_teardown(struct net_device *dev) cpu_dp->orig_ethtool_ops = NULL; } +static ssize_t tagging_show(struct device *d, struct device_attribute *attr, + char *buf) +{ + struct net_device *dev = to_net_dev(d); + struct dsa_port *cpu_dp = dev->dsa_ptr; + + return sprintf(buf, "%s\n", + dsa_tag_protocol_to_str(cpu_dp->tag_ops)); +} +static DEVICE_ATTR_RO(tagging); + +static struct attribute *dsa_slave_attrs[] = { + &dev_attr_tagging.attr, + NULL +}; + +static const struct attribute_group dsa_group = { + .name = "dsa", + .attrs = dsa_slave_attrs, +}; + int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) { + int ret; + /* If we use a tagging format that doesn't have an ethertype * field, make sure that all packets from this point on get * sent to the tag format's receive function. @@ -168,11 +191,20 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) dev->dsa_ptr = cpu_dp; - return dsa_master_ethtool_setup(dev); + ret = dsa_master_ethtool_setup(dev); + if (ret) + return ret; + + ret = sysfs_create_group(&dev->dev.kobj, &dsa_group); + if (ret) + dsa_master_ethtool_teardown(dev); + + return ret; } void dsa_master_teardown(struct net_device *dev) { + sysfs_remove_group(&dev->dev.kobj, &dsa_group); dsa_master_ethtool_teardown(dev); dev->dsa_ptr = NULL; diff --git a/net/dsa/slave.c b/net/dsa/slave.c @@ -1058,27 +1058,6 @@ static struct device_type dsa_type = { .name = "dsa", }; -static ssize_t tagging_show(struct device *d, struct device_attribute *attr, - char *buf) -{ - struct net_device *dev = to_net_dev(d); - struct dsa_port *dp = dsa_slave_to_port(dev); - - return sprintf(buf, "%s\n", - dsa_tag_protocol_to_str(dp->cpu_dp->tag_ops)); -} -static DEVICE_ATTR_RO(tagging); - -static struct attribute *dsa_slave_attrs[] = { - &dev_attr_tagging.attr, - NULL -}; - -static const struct attribute_group dsa_group = { - .name = "dsa", - .attrs = dsa_slave_attrs, -}; - static void dsa_slave_phylink_validate(struct net_device *dev, unsigned long *supported, struct phylink_link_state *state) @@ -1374,14 +1353,8 @@ int dsa_slave_create(struct dsa_port *port) goto out_phy; } - ret = sysfs_create_group(&slave_dev->dev.kobj, &dsa_group); - if (ret) - goto out_unreg; - return 0; -out_unreg: - unregister_netdev(slave_dev); out_phy: rtnl_lock(); phylink_disconnect_phy(p->dp->pl); @@ -1405,7 +1378,6 @@ void dsa_slave_destroy(struct net_device *slave_dev) rtnl_unlock(); dsa_slave_notify(slave_dev, DSA_PORT_UNREGISTER); - sysfs_remove_group(&slave_dev->dev.kobj, &dsa_group); unregister_netdev(slave_dev); phylink_destroy(dp->pl); free_percpu(p->stats64); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c @@ -515,6 +515,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, struct rb_node *rbn; int len; int ihlen; + int delta; int err; u8 ecn; @@ -556,10 +557,16 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, if (len > 65535) goto out_oversize; + delta = - head->truesize; + /* Head of list must not be cloned. */ if (skb_unclone(head, GFP_ATOMIC)) goto out_nomem; + delta += head->truesize; + if (delta) + add_frag_mem_limit(qp->q.net, delta); + /* If the first fragment is fragmented itself, we split * it to two chunks: the first with data and paged part * and the second, holding only fragments. */ diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c @@ -547,7 +547,7 @@ static void ip_list_rcv_finish(struct net *net, struct sock *sk, list_for_each_entry_safe(skb, next, head, list) { struct dst_entry *dst; - list_del(&skb->list); + skb_list_del_init(skb); /* if ingress device is enslaved to an L3 master device pass the * skb to its handler for processing */ @@ -594,7 +594,7 @@ void ip_list_rcv(struct list_head *head, struct packet_type *pt, struct net_device *dev = skb->dev; struct net *net = dev_net(dev); - list_del(&skb->list); + skb_list_del_init(skb); skb = ip_rcv_core(skb, net); if (skb == NULL) continue; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c @@ -1904,7 +1904,9 @@ static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue, * This algorithm is from John Heffner. */ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, - bool *is_cwnd_limited, u32 max_segs) + bool *is_cwnd_limited, + bool *is_rwnd_limited, + u32 max_segs) { const struct inet_connection_sock *icsk = inet_csk(sk); u32 age, send_win, cong_win, limit, in_flight; @@ -1912,9 +1914,6 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, struct sk_buff *head; int win_divisor; - if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) - goto send_now; - if (icsk->icsk_ca_state >= TCP_CA_Recovery) goto send_now; @@ -1973,10 +1972,27 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, if (age < (tp->srtt_us >> 4)) goto send_now; - /* Ok, it looks like it is advisable to defer. */ + /* Ok, it looks like it is advisable to defer. + * Three cases are tracked : + * 1) We are cwnd-limited + * 2) We are rwnd-limited + * 3) We are application limited. + */ + if (cong_win < send_win) { + if (cong_win <= skb->len) { + *is_cwnd_limited = true; + return true; + } + } else { + if (send_win <= skb->len) { + *is_rwnd_limited = true; + return true; + } + } - if (cong_win < send_win && cong_win <= skb->len) - *is_cwnd_limited = true; + /* If this packet won't get more data, do not wait. */ + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) + goto send_now; return true; @@ -2356,7 +2372,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, } else { if (!push_one && tcp_tso_should_defer(sk, skb, &is_cwnd_limited, - max_segs)) + &is_rwnd_limited, max_segs)) break; } @@ -2494,15 +2510,18 @@ void tcp_send_loss_probe(struct sock *sk) goto rearm_timer; } skb = skb_rb_last(&sk->tcp_rtx_queue); + if (unlikely(!skb)) { + WARN_ONCE(tp->packets_out, + "invalid inflight: %u state %u cwnd %u mss %d\n", + tp->packets_out, sk->sk_state, tp->snd_cwnd, mss); + inet_csk(sk)->icsk_pending = 0; + return; + } /* At most one outstanding TLP retransmission. */ if (tp->tlp_high_seq) goto rearm_timer; - /* Retransmit last segment. */ - if (WARN_ON(!skb)) - goto rearm_timer; - if (skb_still_in_host_queue(sk, skb)) goto rearm_timer; @@ -2920,7 +2939,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS; trace_tcp_retransmit_skb(sk, skb); } else if (err != -EBUSY) { - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL); + NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL, segs); } return err; } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c @@ -378,7 +378,7 @@ static void tcp_probe_timer(struct sock *sk) return; } - if (icsk->icsk_probes_out > max_probes) { + if (icsk->icsk_probes_out >= max_probes) { abort: tcp_write_err(sk); } else { /* Only send another probe if we didn't close things up. */ @@ -484,11 +484,12 @@ void tcp_retransmit_timer(struct sock *sk) goto out_reset_timer; } + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEOUTS); if (tcp_write_timeout(sk)) goto out; if (icsk->icsk_retransmits == 0) { - int mib_idx; + int mib_idx = 0; if (icsk->icsk_ca_state == TCP_CA_Recovery) { if (tcp_is_sack(tp)) @@ -503,10 +504,9 @@ void tcp_retransmit_timer(struct sock *sk) mib_idx = LINUX_MIB_TCPSACKFAILURES; else mib_idx = LINUX_MIB_TCPRENOFAILURES; - } else { - mib_idx = LINUX_MIB_TCPTIMEOUTS; } - __NET_INC_STATS(sock_net(sk), mib_idx); + if (mib_idx) + __NET_INC_STATS(sock_net(sk), mib_idx); } tcp_enter_loss(sk); diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c @@ -95,7 +95,7 @@ static void ip6_list_rcv_finish(struct net *net, struct sock *sk, list_for_each_entry_safe(skb, next, head, list) { struct dst_entry *dst; - list_del(&skb->list); + skb_list_del_init(skb); /* if ingress device is enslaved to an L3 master device pass the * skb to its handler for processing */ @@ -296,7 +296,7 @@ void ipv6_list_rcv(struct list_head *head, struct packet_type *pt, struct net_device *dev = skb->dev; struct net *net = dev_net(dev); - list_del(&skb->list); + skb_list_del_init(skb); skb = ip6_rcv_core(skb, dev, net); if (skb == NULL) continue; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c @@ -195,37 +195,37 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, const struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *first_hop = &fl6->daddr; struct dst_entry *dst = skb_dst(skb); + unsigned int head_room; struct ipv6hdr *hdr; u8 proto = fl6->flowi6_proto; int seg_len = skb->len; int hlimit = -1; u32 mtu; - if (opt) { - unsigned int head_room; + head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); + if (opt) + head_room += opt->opt_nflen + opt->opt_flen; - /* First: exthdrs may take lots of space (~8K for now) - MAX_HEADER is not enough. - */ - head_room = opt->opt_nflen + opt->opt_flen; - seg_len += head_room; - head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); - - if (skb_headroom(skb) < head_room) { - struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); - if (!skb2) { - IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_OUTDISCARDS); - kfree_skb(skb); - return -ENOBUFS; - } - if (skb->sk) - skb_set_owner_w(skb2, skb->sk); - consume_skb(skb); - skb = skb2; + if (unlikely(skb_headroom(skb) < head_room)) { + struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); + if (!skb2) { + IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_OUTDISCARDS); + kfree_skb(skb); + return -ENOBUFS; } + if (skb->sk) + skb_set_owner_w(skb2, skb->sk); + consume_skb(skb); + skb = skb2; + } + + if (opt) { + seg_len += opt->opt_nflen + opt->opt_flen; + if (opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); + if (opt->opt_nflen) ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop, &fl6->saddr); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -341,7 +341,7 @@ static bool nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev) { struct sk_buff *fp, *head = fq->q.fragments; - int payload_len; + int payload_len, delta; u8 ecn; inet_frag_kill(&fq->q); @@ -363,10 +363,16 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic return false; } + delta = - head->truesize; + /* Head of list must not be cloned. */ if (skb_unclone(head, GFP_ATOMIC)) return false; + delta += head->truesize; + if (delta) + add_frag_mem_limit(fq->q.net, delta); + /* If the first fragment is fragmented itself, we split * it to two chunks: the first with data and paged part * and the second, holding only fragments. */ diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c @@ -281,7 +281,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, { struct net *net = container_of(fq->q.net, struct net, ipv6.frags); struct sk_buff *fp, *head = fq->q.fragments; - int payload_len; + int payload_len, delta; unsigned int nhoff; int sum_truesize; u8 ecn; @@ -322,10 +322,16 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, if (payload_len > IPV6_MAXPLEN) goto out_oversize; + delta = - head->truesize; + /* Head of list must not be cloned. */ if (skb_unclone(head, GFP_ATOMIC)) goto out_oom; + delta += head->truesize; + if (delta) + add_frag_mem_limit(fq->q.net, delta); + /* If the first fragment is fragmented itself, we split * it to two chunks: the first with data and paged part * and the second, holding only fragments. */ diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c @@ -347,6 +347,7 @@ static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) struct ipv6hdr *hdr = ipv6_hdr(skb); struct flowi6 fl6; + memset(&fl6, 0, sizeof(fl6)); fl6.daddr = hdr->daddr; fl6.saddr = hdr->saddr; fl6.flowlabel = ip6_flowinfo(hdr); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c @@ -2891,7 +2891,7 @@ cfg80211_beacon_dup(struct cfg80211_beacon_data *beacon) len = beacon->head_len + beacon->tail_len + beacon->beacon_ies_len + beacon->proberesp_ies_len + beacon->assocresp_ies_len + - beacon->probe_resp_len; + beacon->probe_resp_len + beacon->lci_len + beacon->civicloc_len; new_beacon = kzalloc(sizeof(*new_beacon) + len, GFP_KERNEL); if (!new_beacon) @@ -2934,8 +2934,9 @@ cfg80211_beacon_dup(struct cfg80211_beacon_data *beacon) memcpy(pos, beacon->probe_resp, beacon->probe_resp_len); pos += beacon->probe_resp_len; } - if (beacon->ftm_responder) - new_beacon->ftm_responder = beacon->ftm_responder; + + /* might copy -1, meaning no changes requested */ + new_beacon->ftm_responder = beacon->ftm_responder; if (beacon->lci) { new_beacon->lci_len = beacon->lci_len; new_beacon->lci = pos; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c @@ -1015,6 +1015,8 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, if (local->open_count == 0) ieee80211_clear_tx_pending(local); + sdata->vif.bss_conf.beacon_int = 0; + /* * If the interface goes down while suspended, presumably because * the device was unplugged and that happens before our resume, diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c @@ -2766,6 +2766,7 @@ static bool ieee80211_mark_sta_auth(struct ieee80211_sub_if_data *sdata, { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct sta_info *sta; + bool result = true; sdata_info(sdata, "authenticated\n"); ifmgd->auth_data->done = true; @@ -2778,15 +2779,18 @@ static bool ieee80211_mark_sta_auth(struct ieee80211_sub_if_data *sdata, sta = sta_info_get(sdata, bssid); if (!sta) { WARN_ONCE(1, "%s: STA %pM not found", sdata->name, bssid); - return false; + result = false; + goto out; } if (sta_info_move_state(sta, IEEE80211_STA_AUTH)) { sdata_info(sdata, "failed moving %pM to auth\n", bssid); - return false; + result = false; + goto out; } - mutex_unlock(&sdata->local->sta_mtx); - return true; +out: + mutex_unlock(&sdata->local->sta_mtx); + return result; } static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c @@ -1403,6 +1403,7 @@ ieee80211_rx_h_check_dup(struct ieee80211_rx_data *rx) return RX_CONTINUE; if (ieee80211_is_ctl(hdr->frame_control) || + ieee80211_is_nullfunc(hdr->frame_control) || ieee80211_is_qos_nullfunc(hdr->frame_control) || is_multicast_ether_addr(hdr->addr1)) return RX_CONTINUE; @@ -3063,7 +3064,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) cfg80211_sta_opmode_change_notify(sdata->dev, rx->sta->addr, &sta_opmode, - GFP_KERNEL); + GFP_ATOMIC); goto handled; } case WLAN_HT_ACTION_NOTIFY_CHANWIDTH: { @@ -3100,7 +3101,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) cfg80211_sta_opmode_change_notify(sdata->dev, rx->sta->addr, &sta_opmode, - GFP_KERNEL); + GFP_ATOMIC); goto handled; } default: diff --git a/net/mac80211/status.c b/net/mac80211/status.c @@ -964,6 +964,8 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw, /* Track when last TDLS packet was ACKed */ if (test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH)) sta->status_stats.last_tdls_pkt_time = jiffies; + } else if (test_sta_flag(sta, WLAN_STA_PS_STA)) { + return; } else { ieee80211_lost_packet(sta, info); } diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c @@ -439,8 +439,8 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx) if (ieee80211_hw_check(&tx->local->hw, QUEUE_CONTROL)) info->hw_queue = tx->sdata->vif.cab_queue; - /* no stations in PS mode */ - if (!atomic_read(&ps->num_sta_ps)) + /* no stations in PS mode and no buffered packets */ + if (!atomic_read(&ps->num_sta_ps) && skb_queue_empty(&ps->bc_buf)) return TX_CONTINUE; info->flags |= IEEE80211_TX_CTL_SEND_AFTER_DTIM; diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c @@ -1166,7 +1166,7 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); if (err) { net_warn_ratelimited("openvswitch: zone: %u " - "execeeds conntrack limit\n", + "exceeds conntrack limit\n", info->zone.id); return err; } diff --git a/net/sched/act_police.c b/net/sched/act_police.c @@ -85,7 +85,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, int ovr, int bind, bool rtnl_held, struct netlink_ext_ack *extack) { - int ret = 0, err; + int ret = 0, tcfp_result = TC_ACT_OK, err, size; struct nlattr *tb[TCA_POLICE_MAX + 1]; struct tc_police *parm; struct tcf_police *police; @@ -93,7 +93,6 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, struct tc_action_net *tn = net_generic(net, police_net_id); struct tcf_police_params *new; bool exists = false; - int size; if (nla == NULL) return -EINVAL; @@ -160,6 +159,16 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, goto failure; } + if (tb[TCA_POLICE_RESULT]) { + tcfp_result = nla_get_u32(tb[TCA_POLICE_RESULT]); + if (TC_ACT_EXT_CMP(tcfp_result, TC_ACT_GOTO_CHAIN)) { + NL_SET_ERR_MSG(extack, + "goto chain not allowed on fallback"); + err = -EINVAL; + goto failure; + } + } + new = kzalloc(sizeof(*new), GFP_KERNEL); if (unlikely(!new)) { err = -ENOMEM; @@ -167,6 +176,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, } /* No failure allowed after this point */ + new->tcfp_result = tcfp_result; new->tcfp_mtu = parm->mtu; if (!new->tcfp_mtu) { new->tcfp_mtu = ~0; @@ -196,16 +206,6 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, if (tb[TCA_POLICE_AVRATE]) new->tcfp_ewma_rate = nla_get_u32(tb[TCA_POLICE_AVRATE]); - if (tb[TCA_POLICE_RESULT]) { - new->tcfp_result = nla_get_u32(tb[TCA_POLICE_RESULT]); - if (TC_ACT_EXT_CMP(new->tcfp_result, TC_ACT_GOTO_CHAIN)) { - NL_SET_ERR_MSG(extack, - "goto chain not allowed on fallback"); - err = -EINVAL; - goto failure; - } - } - spin_lock_bh(&police->tcf_lock); spin_lock_bh(&police->tcfp_lock); police->tcfp_t_c = ktime_get_ns(); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c @@ -1238,18 +1238,16 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, if (err) goto errout_idr; - if (!tc_skip_sw(fnew->flags)) { - if (!fold && fl_lookup(fnew->mask, &fnew->mkey)) { - err = -EEXIST; - goto errout_mask; - } - - err = rhashtable_insert_fast(&fnew->mask->ht, &fnew->ht_node, - fnew->mask->filter_ht_params); - if (err) - goto errout_mask; + if (!fold && fl_lookup(fnew->mask, &fnew->mkey)) { + err = -EEXIST; + goto errout_mask; } + err = rhashtable_insert_fast(&fnew->mask->ht, &fnew->ht_node, + fnew->mask->filter_ht_params); + if (err) + goto errout_mask; + if (!tc_skip_hw(fnew->flags)) { err = fl_hw_replace_filter(tp, fnew, extack); if (err) @@ -1303,9 +1301,8 @@ static int fl_delete(struct tcf_proto *tp, void *arg, bool *last, struct cls_fl_head *head = rtnl_dereference(tp->root); struct cls_fl_filter *f = arg; - if (!tc_skip_sw(f->flags)) - rhashtable_remove_fast(&f->mask->ht, &f->ht_node, - f->mask->filter_ht_params); + rhashtable_remove_fast(&f->mask->ht, &f->ht_node, + f->mask->filter_ht_params); __fl_delete(tp, f, extack); *last = list_empty(&head->masks); return 0; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c @@ -431,6 +431,9 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, int count = 1; int rc = NET_XMIT_SUCCESS; + /* Do not fool qdisc_drop_all() */ + skb->prev = NULL; + /* Random duplication */ if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) ++count; diff --git a/net/sctp/associola.c b/net/sctp/associola.c @@ -118,9 +118,6 @@ static struct sctp_association *sctp_association_init( asoc->flowlabel = sp->flowlabel; asoc->dscp = sp->dscp; - /* Initialize default path MTU. */ - asoc->pathmtu = sp->pathmtu; - /* Set association default SACK delay */ asoc->sackdelay = msecs_to_jiffies(sp->sackdelay); asoc->sackfreq = sp->sackfreq; @@ -252,6 +249,10 @@ static struct sctp_association *sctp_association_init( 0, gfp)) goto fail_init; + /* Initialize default path MTU. */ + asoc->pathmtu = sp->pathmtu; + sctp_assoc_update_frag_point(asoc); + /* Assume that peer would support both address types unless we are * told otherwise. */ @@ -434,7 +435,7 @@ static void sctp_association_destroy(struct sctp_association *asoc) WARN_ON(atomic_read(&asoc->rmem_alloc)); - kfree(asoc); + kfree_rcu(asoc, rcu); SCTP_DBG_OBJCNT_DEC(assoc); } diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c @@ -191,6 +191,12 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, * the packet */ max_data = asoc->frag_point; + if (unlikely(!max_data)) { + max_data = sctp_min_frag_point(sctp_sk(asoc->base.sk), + sctp_datachk_len(&asoc->stream)); + pr_warn_ratelimited("%s: asoc:%p frag_point is zero, forcing max_data to default minimum (%Zu)", + __func__, asoc, max_data); + } /* If the the peer requested that we authenticate DATA chunks * we need to account for bundling of the AUTH chunks along with diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c @@ -2462,6 +2462,9 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk, asoc->c.sinit_max_instreams, gfp)) goto clean_up; + /* Update frag_point when stream_interleave may get changed. */ + sctp_assoc_update_frag_point(asoc); + if (!asoc->temp && sctp_assoc_set_id(asoc, gfp)) goto clean_up; diff --git a/net/sctp/socket.c b/net/sctp/socket.c @@ -3324,8 +3324,7 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned __u16 datasize = asoc ? sctp_datachk_len(&asoc->stream) : sizeof(struct sctp_data_chunk); - min_len = sctp_mtu_payload(sp, SCTP_DEFAULT_MINSEGMENT, - datasize); + min_len = sctp_min_frag_point(sp, datasize); max_len = SCTP_MAX_CHUNK_LEN - datasize; if (val < min_len || val > max_len) diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c @@ -272,11 +272,11 @@ void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa, p1 = (u8*)(ht_capa); p2 = (u8*)(ht_capa_mask); - for (i = 0; i<sizeof(*ht_capa); i++) + for (i = 0; i < sizeof(*ht_capa); i++) p1[i] &= p2[i]; } -/* Do a logical ht_capa &= ht_capa_mask. */ +/* Do a logical vht_capa &= vht_capa_mask. */ void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa, const struct ieee80211_vht_cap *vht_capa_mask) { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c @@ -7870,6 +7870,7 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) } memset(&params, 0, sizeof(params)); + params.beacon_csa.ftm_responder = -1; if (!info->attrs[NL80211_ATTR_WIPHY_FREQ] || !info->attrs[NL80211_ATTR_CH_SWITCH_COUNT]) diff --git a/net/wireless/sme.c b/net/wireless/sme.c @@ -642,11 +642,15 @@ static bool cfg80211_is_all_idle(void) * All devices must be idle as otherwise if you are actively * scanning some new beacon hints could be learned and would * count as new regulatory hints. + * Also if there is any other active beaconing interface we + * need not issue a disconnect hint and reset any info such + * as chan dfs state, etc. */ list_for_each_entry(rdev, &cfg80211_rdev_list, list) { list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { wdev_lock(wdev); - if (wdev->conn || wdev->current_bss) + if (wdev->conn || wdev->current_bss || + cfg80211_beaconing_iface_active(wdev)) is_all_idle = false; wdev_unlock(wdev); } @@ -1171,6 +1175,8 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev, cfg80211_oper_and_ht_capa(&connect->ht_capa_mask, rdev->wiphy.ht_capa_mod_mask); + cfg80211_oper_and_vht_capa(&connect->vht_capa_mask, + rdev->wiphy.vht_capa_mod_mask); if (connkeys && connkeys->def >= 0) { int idx; diff --git a/net/wireless/util.c b/net/wireless/util.c @@ -1421,6 +1421,8 @@ size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen, ies[pos + ext], ext == 2)) pos = skip_ie(ies, ielen, pos); + else + break; } } else { pos = skip_ie(ies, ielen, pos); diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c @@ -100,7 +100,7 @@ int x25_parse_address_block(struct sk_buff *skb, } len = *skb->data; - needed = 1 + (len >> 4) + (len & 0x0f); + needed = 1 + ((len >> 4) + (len & 0x0f) + 1) / 2; if (!pskb_may_pull(skb, needed)) { /* packet is too short to hold the addresses it claims @@ -288,7 +288,7 @@ static struct sock *x25_find_listener(struct x25_address *addr, sk_for_each(s, &x25_list) if ((!strcmp(addr->x25_addr, x25_sk(s)->source_addr.x25_addr) || - !strcmp(addr->x25_addr, + !strcmp(x25_sk(s)->source_addr.x25_addr, null_x25_address.x25_addr)) && s->sk_state == TCP_LISTEN) { /* @@ -688,11 +688,15 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; } - len = strlen(addr->sx25_addr.x25_addr); - for (i = 0; i < len; i++) { - if (!isdigit(addr->sx25_addr.x25_addr[i])) { - rc = -EINVAL; - goto out; + /* check for the null_x25_address */ + if (strcmp(addr->sx25_addr.x25_addr, null_x25_address.x25_addr)) { + + len = strlen(addr->sx25_addr.x25_addr); + for (i = 0; i < len; i++) { + if (!isdigit(addr->sx25_addr.x25_addr[i])) { + rc = -EINVAL; + goto out; + } } } diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c @@ -142,6 +142,15 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp sk->sk_state_change(sk); break; } + case X25_CALL_REQUEST: + /* call collision */ + x25->causediag.cause = 0x01; + x25->causediag.diagnostic = 0x48; + + x25_write_internal(sk, X25_CLEAR_REQUEST); + x25_disconnect(sk, EISCONN, 0x01, 0x48); + break; + case X25_CLEAR_REQUEST: if (!pskb_may_pull(skb, X25_STD_MIN_LEN + 2)) goto out_clear; diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c @@ -32,7 +32,7 @@ static void btf_dumper_ptr(const void *data, json_writer_t *jw, } static int btf_dumper_modifier(const struct btf_dumper *d, __u32 type_id, - const void *data) + __u8 bit_offset, const void *data) { int actual_type_id; @@ -40,7 +40,7 @@ static int btf_dumper_modifier(const struct btf_dumper *d, __u32 type_id, if (actual_type_id < 0) return actual_type_id; - return btf_dumper_do_type(d, actual_type_id, 0, data); + return btf_dumper_do_type(d, actual_type_id, bit_offset, data); } static void btf_dumper_enum(const void *data, json_writer_t *jw) @@ -237,7 +237,7 @@ static int btf_dumper_do_type(const struct btf_dumper *d, __u32 type_id, case BTF_KIND_VOLATILE: case BTF_KIND_CONST: case BTF_KIND_RESTRICT: - return btf_dumper_modifier(d, type_id, data); + return btf_dumper_modifier(d, type_id, bit_offset, data); default: jsonw_printf(d->jw, "(unsupported-kind"); return -EINVAL; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h @@ -2170,7 +2170,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) + * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) * Description * Look for TCP socket matching *tuple*, optionally in a child * network namespace *netns*. The return value must be checked, @@ -2187,12 +2187,14 @@ union bpf_attr { * **sizeof**\ (*tuple*\ **->ipv6**) * Look for an IPv6 socket. * - * If the *netns* is zero, then the socket lookup table in the - * netns associated with the *ctx* will be used. For the TC hooks, - * this in the netns of the device in the skb. For socket hooks, - * this in the netns of the socket. If *netns* is non-zero, then - * it specifies the ID of the netns relative to the netns - * associated with the *ctx*. + * If the *netns* is a negative signed 32-bit integer, then the + * socket lookup table in the netns associated with the *ctx* will + * will be used. For the TC hooks, this is the netns of the device + * in the skb. For socket hooks, this is the netns of the socket. + * If *netns* is any other signed 32-bit value greater than or + * equal to zero then it specifies the ID of the netns relative to + * the netns associated with the *ctx*. *netns* values beyond the + * range of 32-bit integers are reserved for future use. * * All values for *flags* are reserved for future usage, and must * be left at zero. @@ -2201,8 +2203,10 @@ union bpf_attr { * **CONFIG_NET** configuration option. * Return * Pointer to *struct bpf_sock*, or NULL in case of failure. + * For sockets with reuseport option, the *struct bpf_sock* + * result is from reuse->socks[] using the hash of the tuple. * - * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) + * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) * Description * Look for UDP socket matching *tuple*, optionally in a child * network namespace *netns*. The return value must be checked, @@ -2219,12 +2223,14 @@ union bpf_attr { * **sizeof**\ (*tuple*\ **->ipv6**) * Look for an IPv6 socket. * - * If the *netns* is zero, then the socket lookup table in the - * netns associated with the *ctx* will be used. For the TC hooks, - * this in the netns of the device in the skb. For socket hooks, - * this in the netns of the socket. If *netns* is non-zero, then - * it specifies the ID of the netns relative to the netns - * associated with the *ctx*. + * If the *netns* is a negative signed 32-bit integer, then the + * socket lookup table in the netns associated with the *ctx* will + * will be used. For the TC hooks, this is the netns of the device + * in the skb. For socket hooks, this is the netns of the socket. + * If *netns* is any other signed 32-bit value greater than or + * equal to zero then it specifies the ID of the netns relative to + * the netns associated with the *ctx*. *netns* values beyond the + * range of 32-bit integers are reserved for future use. * * All values for *flags* are reserved for future usage, and must * be left at zero. @@ -2233,6 +2239,8 @@ union bpf_attr { * **CONFIG_NET** configuration option. * Return * Pointer to *struct bpf_sock*, or NULL in case of failure. + * For sockets with reuseport option, the *struct bpf_sock* + * result is from reuse->socks[] using the hash of the tuple. * * int bpf_sk_release(struct bpf_sock *sk) * Description @@ -2405,6 +2413,9 @@ enum bpf_func_id { /* BPF_FUNC_perf_event_output for sk_buff input context. */ #define BPF_F_CTXLEN_MASK (0xfffffULL << 32) +/* Current network namespace */ +#define BPF_F_CURRENT_NETNS (-1L) + /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { BPF_ADJ_ROOM_NET, @@ -2422,6 +2433,12 @@ enum bpf_lwt_encap_mode { BPF_LWT_ENCAP_SEG6_INLINE }; +#define __bpf_md_ptr(type, name) \ +union { \ + type name; \ + __u64 :64; \ +} __attribute__((aligned(8))) + /* user accessible mirror of in-kernel sk_buff. * new fields can only be added to the end of this structure */ @@ -2456,7 +2473,7 @@ struct __sk_buff { /* ... here. */ __u32 data_meta; - struct bpf_flow_keys *flow_keys; + __bpf_md_ptr(struct bpf_flow_keys *, flow_keys); }; struct bpf_tunnel_key { @@ -2572,8 +2589,8 @@ enum sk_action { * be added to the end of this structure */ struct sk_msg_md { - void *data; - void *data_end; + __bpf_md_ptr(void *, data); + __bpf_md_ptr(void *, data_end); __u32 family; __u32 remote_ip4; /* Stored in network byte order */ @@ -2589,8 +2606,9 @@ struct sk_reuseport_md { * Start of directly accessible data. It begins from * the tcp/udp header. */ - void *data; - void *data_end; /* End of directly accessible data */ + __bpf_md_ptr(void *, data); + /* End of directly accessible data */ + __bpf_md_ptr(void *, data_end); /* * Total length of packet (starting from the tcp/udp header). * Note that the directly accessible bytes (data_end - data) diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h @@ -154,12 +154,12 @@ static unsigned long long (*bpf_skb_ancestor_cgroup_id)(void *ctx, int level) = (void *) BPF_FUNC_skb_ancestor_cgroup_id; static struct bpf_sock *(*bpf_sk_lookup_tcp)(void *ctx, struct bpf_sock_tuple *tuple, - int size, unsigned int netns_id, + int size, unsigned long long netns_id, unsigned long long flags) = (void *) BPF_FUNC_sk_lookup_tcp; static struct bpf_sock *(*bpf_sk_lookup_udp)(void *ctx, struct bpf_sock_tuple *tuple, - int size, unsigned int netns_id, + int size, unsigned long long netns_id, unsigned long long flags) = (void *) BPF_FUNC_sk_lookup_udp; static int (*bpf_sk_release)(struct bpf_sock *sk) = diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c @@ -432,11 +432,11 @@ static struct btf_raw_test raw_tests[] = { /* const void* */ /* [3] */ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), /* typedef const void * const_void_ptr */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3), - /* struct A { */ /* [4] */ + BTF_TYPEDEF_ENC(NAME_TBD, 3), /* [4] */ + /* struct A { */ /* [5] */ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)), /* const_void_ptr m; */ - BTF_MEMBER_ENC(NAME_TBD, 3, 0), + BTF_MEMBER_ENC(NAME_TBD, 4, 0), /* } */ BTF_END_RAW, }, @@ -494,10 +494,10 @@ static struct btf_raw_test raw_tests[] = { BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0), /* const void* */ /* [3] */ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), - /* typedef const void * const_void_ptr */ /* [4] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3), - /* const_void_ptr[4] */ /* [5] */ - BTF_TYPE_ARRAY_ENC(3, 1, 4), + /* typedef const void * const_void_ptr */ + BTF_TYPEDEF_ENC(NAME_TBD, 3), /* [4] */ + /* const_void_ptr[4] */ + BTF_TYPE_ARRAY_ENC(4, 1, 4), /* [5] */ BTF_END_RAW, }, .str_sec = "\0const_void_ptr", @@ -1293,6 +1293,367 @@ static struct btf_raw_test raw_tests[] = { }, { + .descr = "typedef (invalid name, name_off = 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPEDEF_ENC(0, 1), /* [2] */ + BTF_END_RAW, + }, + .str_sec = "\0__int", + .str_sec_size = sizeof("\0__int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "typedef_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "typedef (invalid name, invalid identifier)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPEDEF_ENC(NAME_TBD, 1), /* [2] */ + BTF_END_RAW, + }, + .str_sec = "\0__!int", + .str_sec_size = sizeof("\0__!int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "typedef_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "ptr type (invalid name, name_off <> 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 1), /* [2] */ + BTF_END_RAW, + }, + .str_sec = "\0__int", + .str_sec_size = sizeof("\0__int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "ptr_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "volatile type (invalid name, name_off <> 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_VOLATILE, 0, 0), 1), /* [2] */ + BTF_END_RAW, + }, + .str_sec = "\0__int", + .str_sec_size = sizeof("\0__int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "volatile_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "const type (invalid name, name_off <> 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 1), /* [2] */ + BTF_END_RAW, + }, + .str_sec = "\0__int", + .str_sec_size = sizeof("\0__int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "const_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "restrict type (invalid name, name_off <> 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 1), /* [2] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_RESTRICT, 0, 0), 2), /* [3] */ + BTF_END_RAW, + }, + .str_sec = "\0__int", + .str_sec_size = sizeof("\0__int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "restrict_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "fwd type (invalid name, name_off = 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FWD, 0, 0), 0), /* [2] */ + BTF_END_RAW, + }, + .str_sec = "\0__skb", + .str_sec_size = sizeof("\0__skb"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "fwd_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "fwd type (invalid name, invalid identifier)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_FWD, 0, 0), 0), /* [2] */ + BTF_END_RAW, + }, + .str_sec = "\0__!skb", + .str_sec_size = sizeof("\0__!skb"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "fwd_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "array type (invalid name, name_off <> 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_ARRAY, 0, 0), 0), /* [2] */ + BTF_ARRAY_ENC(1, 1, 4), + BTF_END_RAW, + }, + .str_sec = "\0__skb", + .str_sec_size = sizeof("\0__skb"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "struct type (name_off = 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, + BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_END_RAW, + }, + .str_sec = "\0A", + .str_sec_size = sizeof("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "struct type (invalid name, invalid identifier)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_END_RAW, + }, + .str_sec = "\0A!\0B", + .str_sec_size = sizeof("\0A!\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "struct member (name_off = 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, + BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_END_RAW, + }, + .str_sec = "\0A", + .str_sec_size = sizeof("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "struct member (invalid name, invalid identifier)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_END_RAW, + }, + .str_sec = "\0A\0B*", + .str_sec_size = sizeof("\0A\0B*"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "enum type (name_off = 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, + BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), + sizeof(int)), /* [2] */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_END_RAW, + }, + .str_sec = "\0A\0B", + .str_sec_size = sizeof("\0A\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "enum_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "enum type (invalid name, invalid identifier)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), + sizeof(int)), /* [2] */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_END_RAW, + }, + .str_sec = "\0A!\0B", + .str_sec_size = sizeof("\0A!\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "enum_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "enum member (invalid name, name_off = 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, + BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), + sizeof(int)), /* [2] */ + BTF_ENUM_ENC(0, 0), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "enum_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "enum member (invalid name, invalid identifier)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, + BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), + sizeof(int)), /* [2] */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_END_RAW, + }, + .str_sec = "\0A!", + .str_sec_size = sizeof("\0A!"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "enum_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, +{ .descr = "arraymap invalid btf key (a bit field)", .raw_types = { /* int */ /* [1] */ diff --git a/tools/testing/selftests/bpf/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/test_sk_lookup_kern.c @@ -72,7 +72,7 @@ int bpf_sk_lookup_test0(struct __sk_buff *skb) return TC_ACT_SHOT; tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6); - sk = bpf_sk_lookup_tcp(skb, tuple, tuple_len, 0, 0); + sk = bpf_sk_lookup_tcp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0); if (sk) bpf_sk_release(sk); return sk ? TC_ACT_OK : TC_ACT_UNSPEC; @@ -84,7 +84,7 @@ int bpf_sk_lookup_test1(struct __sk_buff *skb) struct bpf_sock_tuple tuple = {}; struct bpf_sock *sk; - sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); if (sk) bpf_sk_release(sk); return 0; @@ -97,7 +97,7 @@ int bpf_sk_lookup_uaf(struct __sk_buff *skb) struct bpf_sock *sk; __u32 family = 0; - sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); if (sk) { bpf_sk_release(sk); family = sk->family; @@ -112,7 +112,7 @@ int bpf_sk_lookup_modptr(struct __sk_buff *skb) struct bpf_sock *sk; __u32 family; - sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); if (sk) { sk += 1; bpf_sk_release(sk); @@ -127,7 +127,7 @@ int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb) struct bpf_sock *sk; __u32 family; - sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); sk += 1; if (sk) bpf_sk_release(sk); @@ -139,7 +139,7 @@ int bpf_sk_lookup_test2(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; - bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); return 0; } @@ -149,7 +149,7 @@ int bpf_sk_lookup_test3(struct __sk_buff *skb) struct bpf_sock_tuple tuple = {}; struct bpf_sock *sk; - sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); bpf_sk_release(sk); bpf_sk_release(sk); return 0; @@ -161,7 +161,7 @@ int bpf_sk_lookup_test4(struct __sk_buff *skb) struct bpf_sock_tuple tuple = {}; struct bpf_sock *sk; - sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); bpf_sk_release(sk); return 0; } @@ -169,7 +169,7 @@ int bpf_sk_lookup_test4(struct __sk_buff *skb) void lookup_no_release(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; - bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); } SEC("fail_no_release_subcall") diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c @@ -8576,7 +8576,7 @@ static struct bpf_test tests[] = { BPF_JMP_IMM(BPF_JA, 0, 0, -7), }, .fixup_map_hash_8b = { 4 }, - .errstr = "R0 invalid mem access 'inv'", + .errstr = "unbounded min value", .result = REJECT, }, { @@ -10547,7 +10547,7 @@ static struct bpf_test tests[] = { "check deducing bounds from const, 5", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1), + BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 1), BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, @@ -14230,7 +14230,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, reject_from_alignment = fd_prog < 0 && (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) && - strstr(bpf_vlog, "Unknown alignment."); + strstr(bpf_vlog, "misaligned"); #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS if (reject_from_alignment) { printf("FAIL\nFailed due to alignment despite having efficient unaligned access: '%s'!\n",