whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit 0548740e53e6fe674f850d36db51eccb0557d938
parent 8e22ba96d44c4ad5f9970565c54ab1876448a5ca
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Thu,  4 Apr 2019 18:07:12 -1000

Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

Pull networking fixes from David Miller:

 1) Several hash table refcount fixes in batman-adv, from Sven
    Eckelmann.

 2) Use after free in bpf_evict_inode(), from Daniel Borkmann.

 3) Fix mdio bus registration in ixgbe, from Ivan Vecera.

 4) Unbounded loop in __skb_try_recv_datagram(), from Paolo Abeni.

 5) ila rhashtable corruption fix from Herbert Xu.

 6) Don't allow upper-devices to be added to vrf devices, from Sabrina
    Dubroca.

 7) Add qmi_wwan device ID for Olicard 600, from Bjørn Mork.

 8) Don't leave skb->next poisoned in __netif_receive_skb_list_ptype,
    from Alexander Lobakin.

 9) Missing IDR checks in mlx5 driver, from Aditya Pakki.

10) Fix false connection termination in ktls, from Jakub Kicinski.

11) Work around some ASPM issues with r8169 by disabling rx interrupt
    coalescing on certain chips. From Heiner Kallweit.

12) Properly use per-cpu qstat values on NOLOCK qdiscs, from Paolo
    Abeni.

13) Fully initialize sockaddr_in structures in SCTP, from Xin Long.

14) Various BPF flow dissector fixes from Stanislav Fomichev.

15) Divide by zero in act_sample, from Davide Caratti.

16) Fix bridging multicast regression introduced by rhashtable
    conversion, from Nikolay Aleksandrov.

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (106 commits)
  ibmvnic: Fix completion structure initialization
  ipv6: sit: reset ip header pointer in ipip6_rcv
  net: bridge: always clear mcast matching struct on reports and leaves
  libcxgb: fix incorrect ppmax calculation
  vlan: conditional inclusion of FCoE hooks to match netdevice.h and bnx2x
  sch_cake: Make sure we can write the IP header before changing DSCP bits
  sch_cake: Use tc_skb_protocol() helper for getting packet protocol
  tcp: Ensure DCTCP reacts to losses
  net/sched: act_sample: fix divide by zero in the traffic path
  net: thunderx: fix NULL pointer dereference in nicvf_open/nicvf_stop
  net: hns: Fix sparse: some warnings in HNS drivers
  net: hns: Fix WARNING when remove HNS driver with SMMU enabled
  net: hns: fix ICMP6 neighbor solicitation messages discard problem
  net: hns: Fix probabilistic memory overwrite when HNS driver initialized
  net: hns: Use NAPI_POLL_WEIGHT for hns driver
  net: hns: fix KASAN: use-after-free in hns_nic_net_xmit_hw()
  flow_dissector: rst'ify documentation
  ipv6: Fix dangling pointer when ipv6 fragment
  net-gro: Fix GRO flush when receiving a GSO packet.
  flow_dissector: document BPF flow dissector environment
  ...

Diffstat:
MDocumentation/bpf/btf.rst | 8++++----
ADocumentation/networking/bpf_flow_dissector.rst | 126+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
MDocumentation/networking/index.rst | 1+
MMAINTAINERS | 4++--
Mdrivers/net/bonding/bond_sysfs_slave.c | 4+++-
Mdrivers/net/dsa/mv88e6xxx/port.c | 24++++++++++++++++--------
Mdrivers/net/ethernet/cavium/thunder/nicvf_main.c | 20++++++++++++--------
Mdrivers/net/ethernet/cavium/thunder/nicvf_queues.c | 30++++++++++++++----------------
Mdrivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c | 9++++++++-
Mdrivers/net/ethernet/hisilicon/hns/hnae.c | 4+++-
Mdrivers/net/ethernet/hisilicon/hns/hnae.h | 2+-
Mdrivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c | 2+-
Mdrivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h | 4++--
Mdrivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c | 53+++++++++++++++++++++++++++++++++--------------------
Mdrivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h | 2++
Mdrivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c | 2+-
Mdrivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c | 6+++---
Mdrivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h | 4++--
Mdrivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c | 4++--
Mdrivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h | 12++++++------
Mdrivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c | 2+-
Mdrivers/net/ethernet/hisilicon/hns/hns_enet.c | 12+++++-------
Mdrivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile | 2+-
Mdrivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile | 2+-
Mdrivers/net/ethernet/hisilicon/hns_mdio.c | 18+++++++-----------
Mdrivers/net/ethernet/ibm/ibmvnic.c | 5+++--
Mdrivers/net/ethernet/intel/fm10k/fm10k_main.c | 2++
Mdrivers/net/ethernet/intel/i40e/i40e.h | 16++--------------
Mdrivers/net/ethernet/intel/i40e/i40e_ethtool.c | 3+--
Mdrivers/net/ethernet/intel/i40e/i40e_main.c | 28++++++++++++++++++++++++++++
Mdrivers/net/ethernet/intel/i40e/i40e_ptp.c | 5+++--
Mdrivers/net/ethernet/intel/i40e/i40e_xsk.c | 3+++
Mdrivers/net/ethernet/intel/igb/e1000_defines.h | 2++
Mdrivers/net/ethernet/intel/igb/igb_main.c | 57++++++++-------------------------------------------------
Mdrivers/net/ethernet/intel/ixgbe/ixgbe_phy.c | 16+++++++++-------
Mdrivers/net/ethernet/mellanox/mlx5/core/en/port.c | 3---
Mdrivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c | 39++++++++++++++++++++++-----------------
Mdrivers/net/ethernet/mellanox/mlx5/core/en_common.c | 13+++++++++++--
Mdrivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 52++++++++++++++++++++++++++++++++++------------------
Mdrivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 82+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------
Mdrivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 9+++++----
Mdrivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 1+
Mdrivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c | 14+++++++++++---
Mdrivers/net/ethernet/mellanox/mlx5/core/main.c | 20--------------------
Mdrivers/net/ethernet/netronome/nfp/flower/action.c | 3+--
Mdrivers/net/ethernet/netronome/nfp/flower/cmsg.h | 3+--
Mdrivers/net/ethernet/netronome/nfp/flower/match.c | 27+++++++++++++--------------
Mdrivers/net/ethernet/netronome/nfp/nfp_net_repr.c | 4++--
Mdrivers/net/ethernet/realtek/r8169.c | 2+-
Mdrivers/net/ethernet/stmicro/stmmac/descs_com.h | 22++++++++++++++--------
Mdrivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c | 2+-
Mdrivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c | 2+-
Mdrivers/net/ethernet/stmicro/stmmac/enh_desc.c | 22++++++++++++++++------
Mdrivers/net/ethernet/stmicro/stmmac/hwif.h | 2+-
Mdrivers/net/ethernet/stmicro/stmmac/norm_desc.c | 12+++++++-----
Mdrivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 34+++++++++++++++++++---------------
Mdrivers/net/hyperv/hyperv_net.h | 1+
Mdrivers/net/hyperv/netvsc.c | 6++++--
Mdrivers/net/hyperv/netvsc_drv.c | 32++++++++++++++++++++++++++------
Mdrivers/net/usb/qmi_wwan.c | 1+
Mdrivers/net/vrf.c | 1+
Minclude/linux/mii.h | 2+-
Minclude/linux/mlx5/driver.h | 2++
Minclude/net/ip.h | 2+-
Minclude/net/net_namespace.h | 1+
Minclude/net/netns/hash.h | 10++--------
Minclude/net/sch_generic.h | 44+++++++++++++++++++++++++++++++++++++-------
Mkernel/bpf/cpumap.c | 13++++++++++---
Mkernel/bpf/inode.c | 32++++++++++++++++++--------------
Mkernel/bpf/verifier.c | 5+++--
Mnet/8021q/vlan_dev.c | 26+++++++++++++++-----------
Mnet/batman-adv/bat_v_elp.c | 6++++--
Mnet/batman-adv/bridge_loop_avoidance.c | 16+++++++++++++---
Mnet/batman-adv/sysfs.c | 7+++++--
Mnet/batman-adv/translation-table.c | 32++++++++++++++++++++++++--------
Mnet/bridge/br_multicast.c | 3+++
Mnet/core/datagram.c | 2+-
Mnet/core/dev.c | 4+++-
Mnet/core/ethtool.c | 46++++++++++++++++++++++++++++++----------------
Mnet/core/filter.c | 16+++-------------
Mnet/core/flow_dissector.c | 4+++-
Mnet/core/net_namespace.c | 1+
Mnet/core/skbuff.c | 2+-
Mnet/dccp/feat.c | 7++++++-
Mnet/dsa/tag_qca.c | 10++++++++++
Mnet/ipv4/ip_input.c | 7+++----
Mnet/ipv4/ip_options.c | 4++--
Mnet/ipv4/tcp_dctcp.c | 36++++++++++++++++++------------------
Mnet/ipv4/tcp_ipv4.c | 3++-
Mnet/ipv6/ila/ila_xlat.c | 1+
Mnet/ipv6/ip6_output.c | 4+++-
Mnet/ipv6/ip6_tunnel.c | 4++--
Mnet/ipv6/sit.c | 4++++
Mnet/kcm/kcmsock.c | 16++++++++--------
Mnet/openvswitch/flow_netlink.c | 4++--
Mnet/rds/tcp.c | 2+-
Mnet/sched/act_sample.c | 10++++++++--
Mnet/sched/cls_matchall.c | 5+++++
Mnet/sched/sch_cake.c | 13++++++++++++-
Mnet/sched/sch_cbq.c | 10++++------
Mnet/sched/sch_drr.c | 16++++------------
Mnet/sched/sch_hfsc.c | 19+++++--------------
Mnet/sched/sch_htb.c | 22++++++----------------
Mnet/sched/sch_mq.c | 2+-
Mnet/sched/sch_mqprio.c | 3+--
Mnet/sched/sch_multiq.c | 10++++------
Mnet/sched/sch_prio.c | 10+++-------
Mnet/sched/sch_qfq.c | 14++------------
Mnet/sched/sch_red.c | 3+--
Mnet/sched/sch_sfb.c | 3+--
Mnet/sched/sch_taprio.c | 2+-
Mnet/sched/sch_tbf.c | 3+--
Mnet/sctp/protocol.c | 1+
Mnet/tipc/netlink_compat.c | 24++++++++++++++++++++----
Mnet/tls/tls_sw.c | 2++
Mtools/lib/bpf/Makefile | 7++++---
Mtools/lib/bpf/btf.c | 3+++
Mtools/testing/selftests/bpf/prog_tests/flow_dissector.c | 68++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtools/testing/selftests/bpf/progs/bpf_flow.c | 19+++++++------------
Mtools/testing/selftests/bpf/test_btf.c | 47+++++++++++++++++++++++++++++++++++++++++++++++
Mtools/testing/selftests/bpf/verifier/calls.c | 38++++++++++++++++++++++++++++++++++++++
Mtools/testing/selftests/tc-testing/tc-tests/actions/sample.json | 24++++++++++++++++++++++++
122 files changed, 1096 insertions(+), 563 deletions(-)

diff --git a/Documentation/bpf/btf.rst b/Documentation/bpf/btf.rst @@ -148,16 +148,16 @@ The ``btf_type.size * 8`` must be equal to or greater than ``BTF_INT_BITS()`` for the type. The maximum value of ``BTF_INT_BITS()`` is 128. The ``BTF_INT_OFFSET()`` specifies the starting bit offset to calculate values -for this int. For example, a bitfield struct member has: * btf member bit -offset 100 from the start of the structure, * btf member pointing to an int -type, * the int type has ``BTF_INT_OFFSET() = 2`` and ``BTF_INT_BITS() = 4`` +for this int. For example, a bitfield struct member has: + * btf member bit offset 100 from the start of the structure, + * btf member pointing to an int type, + * the int type has ``BTF_INT_OFFSET() = 2`` and ``BTF_INT_BITS() = 4`` Then in the struct memory layout, this member will occupy ``4`` bits starting from bits ``100 + 2 = 102``. Alternatively, the bitfield struct member can be the following to access the same bits as the above: - * btf member bit offset 102, * btf member pointing to an int type, * the int type has ``BTF_INT_OFFSET() = 0`` and ``BTF_INT_BITS() = 4`` diff --git a/Documentation/networking/bpf_flow_dissector.rst b/Documentation/networking/bpf_flow_dissector.rst @@ -0,0 +1,126 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================== +BPF Flow Dissector +================== + +Overview +======== + +Flow dissector is a routine that parses metadata out of the packets. It's +used in the various places in the networking subsystem (RFS, flow hash, etc). + +BPF flow dissector is an attempt to reimplement C-based flow dissector logic +in BPF to gain all the benefits of BPF verifier (namely, limits on the +number of instructions and tail calls). + +API +=== + +BPF flow dissector programs operate on an ``__sk_buff``. However, only the +limited set of fields is allowed: ``data``, ``data_end`` and ``flow_keys``. +``flow_keys`` is ``struct bpf_flow_keys`` and contains flow dissector input +and output arguments. + +The inputs are: + * ``nhoff`` - initial offset of the networking header + * ``thoff`` - initial offset of the transport header, initialized to nhoff + * ``n_proto`` - L3 protocol type, parsed out of L2 header + +Flow dissector BPF program should fill out the rest of the ``struct +bpf_flow_keys`` fields. Input arguments ``nhoff/thoff/n_proto`` should be +also adjusted accordingly. + +The return code of the BPF program is either BPF_OK to indicate successful +dissection, or BPF_DROP to indicate parsing error. + +__sk_buff->data +=============== + +In the VLAN-less case, this is what the initial state of the BPF flow +dissector looks like:: + + +------+------+------------+-----------+ + | DMAC | SMAC | ETHER_TYPE | L3_HEADER | + +------+------+------------+-----------+ + ^ + | + +-- flow dissector starts here + + +.. code:: c + + skb->data + flow_keys->nhoff point to the first byte of L3_HEADER + flow_keys->thoff = nhoff + flow_keys->n_proto = ETHER_TYPE + +In case of VLAN, flow dissector can be called with the two different states. + +Pre-VLAN parsing:: + + +------+------+------+-----+-----------+-----------+ + | DMAC | SMAC | TPID | TCI |ETHER_TYPE | L3_HEADER | + +------+------+------+-----+-----------+-----------+ + ^ + | + +-- flow dissector starts here + +.. code:: c + + skb->data + flow_keys->nhoff point the to first byte of TCI + flow_keys->thoff = nhoff + flow_keys->n_proto = TPID + +Please note that TPID can be 802.1AD and, hence, BPF program would +have to parse VLAN information twice for double tagged packets. + + +Post-VLAN parsing:: + + +------+------+------+-----+-----------+-----------+ + | DMAC | SMAC | TPID | TCI |ETHER_TYPE | L3_HEADER | + +------+------+------+-----+-----------+-----------+ + ^ + | + +-- flow dissector starts here + +.. code:: c + + skb->data + flow_keys->nhoff point the to first byte of L3_HEADER + flow_keys->thoff = nhoff + flow_keys->n_proto = ETHER_TYPE + +In this case VLAN information has been processed before the flow dissector +and BPF flow dissector is not required to handle it. + + +The takeaway here is as follows: BPF flow dissector program can be called with +the optional VLAN header and should gracefully handle both cases: when single +or double VLAN is present and when it is not present. The same program +can be called for both cases and would have to be written carefully to +handle both cases. + + +Reference Implementation +======================== + +See ``tools/testing/selftests/bpf/progs/bpf_flow.c`` for the reference +implementation and ``tools/testing/selftests/bpf/flow_dissector_load.[hc]`` +for the loader. bpftool can be used to load BPF flow dissector program as well. + +The reference implementation is organized as follows: + * ``jmp_table`` map that contains sub-programs for each supported L3 protocol + * ``_dissect`` routine - entry point; it does input ``n_proto`` parsing and + does ``bpf_tail_call`` to the appropriate L3 handler + +Since BPF at this point doesn't support looping (or any jumping back), +jmp_table is used instead to handle multiple levels of encapsulation (and +IPv6 options). + + +Current Limitations +=================== +BPF flow dissector doesn't support exporting all the metadata that in-kernel +C-based implementation can export. Notable example is single VLAN (802.1Q) +and double VLAN (802.1AD) tags. Please refer to the ``struct bpf_flow_keys`` +for a set of information that's currently can be exported from the BPF context. diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst @@ -9,6 +9,7 @@ Contents: netdev-FAQ af_xdp batman-adv + bpf_flow_dissector can can_ucan_protocol device_drivers/freescale/dpaa2/index diff --git a/MAINTAINERS b/MAINTAINERS @@ -5833,7 +5833,7 @@ L: netdev@vger.kernel.org S: Maintained F: Documentation/ABI/testing/sysfs-bus-mdio F: Documentation/devicetree/bindings/net/mdio* -F: Documentation/networking/phy.txt +F: Documentation/networking/phy.rst F: drivers/net/phy/ F: drivers/of/of_mdio.c F: drivers/of/of_net.c @@ -13981,7 +13981,7 @@ F: drivers/media/rc/serial_ir.c SFC NETWORK DRIVER M: Solarflare linux maintainers <linux-net-drivers@solarflare.com> M: Edward Cree <ecree@solarflare.com> -M: Bert Kenward <bkenward@solarflare.com> +M: Martin Habets <mhabets@solarflare.com> L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/sfc/ diff --git a/drivers/net/bonding/bond_sysfs_slave.c b/drivers/net/bonding/bond_sysfs_slave.c @@ -55,7 +55,9 @@ static SLAVE_ATTR_RO(link_failure_count); static ssize_t perm_hwaddr_show(struct slave *slave, char *buf) { - return sprintf(buf, "%pM\n", slave->perm_hwaddr); + return sprintf(buf, "%*phC\n", + slave->dev->addr_len, + slave->perm_hwaddr); } static SLAVE_ATTR_RO(perm_hwaddr); diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c @@ -427,18 +427,22 @@ int mv88e6390x_port_set_cmode(struct mv88e6xxx_chip *chip, int port, return 0; lane = mv88e6390x_serdes_get_lane(chip, port); - if (lane < 0) + if (lane < 0 && lane != -ENODEV) return lane; - if (chip->ports[port].serdes_irq) { - err = mv88e6390_serdes_irq_disable(chip, port, lane); + if (lane >= 0) { + if (chip->ports[port].serdes_irq) { + err = mv88e6390_serdes_irq_disable(chip, port, lane); + if (err) + return err; + } + + err = mv88e6390x_serdes_power(chip, port, false); if (err) return err; } - err = mv88e6390x_serdes_power(chip, port, false); - if (err) - return err; + chip->ports[port].cmode = 0; if (cmode) { err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_STS, &reg); @@ -452,6 +456,12 @@ int mv88e6390x_port_set_cmode(struct mv88e6xxx_chip *chip, int port, if (err) return err; + chip->ports[port].cmode = cmode; + + lane = mv88e6390x_serdes_get_lane(chip, port); + if (lane < 0) + return lane; + err = mv88e6390x_serdes_power(chip, port, true); if (err) return err; @@ -463,8 +473,6 @@ int mv88e6390x_port_set_cmode(struct mv88e6xxx_chip *chip, int port, } } - chip->ports[port].cmode = cmode; - return 0; } diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -1328,10 +1328,11 @@ int nicvf_stop(struct net_device *netdev) struct nicvf_cq_poll *cq_poll = NULL; union nic_mbx mbx = {}; - cancel_delayed_work_sync(&nic->link_change_work); - /* wait till all queued set_rx_mode tasks completes */ - drain_workqueue(nic->nicvf_rx_mode_wq); + if (nic->nicvf_rx_mode_wq) { + cancel_delayed_work_sync(&nic->link_change_work); + drain_workqueue(nic->nicvf_rx_mode_wq); + } mbx.msg.msg = NIC_MBOX_MSG_SHUTDOWN; nicvf_send_msg_to_pf(nic, &mbx); @@ -1452,7 +1453,8 @@ int nicvf_open(struct net_device *netdev) struct nicvf_cq_poll *cq_poll = NULL; /* wait till all queued set_rx_mode tasks completes if any */ - drain_workqueue(nic->nicvf_rx_mode_wq); + if (nic->nicvf_rx_mode_wq) + drain_workqueue(nic->nicvf_rx_mode_wq); netif_carrier_off(netdev); @@ -1550,10 +1552,12 @@ int nicvf_open(struct net_device *netdev) /* Send VF config done msg to PF */ nicvf_send_cfg_done(nic); - INIT_DELAYED_WORK(&nic->link_change_work, - nicvf_link_status_check_task); - queue_delayed_work(nic->nicvf_rx_mode_wq, - &nic->link_change_work, 0); + if (nic->nicvf_rx_mode_wq) { + INIT_DELAYED_WORK(&nic->link_change_work, + nicvf_link_status_check_task); + queue_delayed_work(nic->nicvf_rx_mode_wq, + &nic->link_change_work, 0); + } return 0; cleanup: diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -105,20 +105,19 @@ static inline struct pgcache *nicvf_alloc_page(struct nicvf *nic, /* Check if page can be recycled */ if (page) { ref_count = page_ref_count(page); - /* Check if this page has been used once i.e 'put_page' - * called after packet transmission i.e internal ref_count - * and page's ref_count are equal i.e page can be recycled. + /* This page can be recycled if internal ref_count and page's + * ref_count are equal, indicating that the page has been used + * once for packet transmission. For non-XDP mode, internal + * ref_count is always '1'. */ - if (rbdr->is_xdp && (ref_count == pgcache->ref_count)) - pgcache->ref_count--; - else - page = NULL; - - /* In non-XDP mode, page's ref_count needs to be '1' for it - * to be recycled. - */ - if (!rbdr->is_xdp && (ref_count != 1)) + if (rbdr->is_xdp) { + if (ref_count == pgcache->ref_count) + pgcache->ref_count--; + else + page = NULL; + } else if (ref_count != 1) { page = NULL; + } } if (!page) { @@ -365,11 +364,10 @@ static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr) while (head < rbdr->pgcnt) { pgcache = &rbdr->pgcache[head]; if (pgcache->page && page_ref_count(pgcache->page) != 0) { - if (!rbdr->is_xdp) { - put_page(pgcache->page); - continue; + if (rbdr->is_xdp) { + page_ref_sub(pgcache->page, + pgcache->ref_count - 1); } - page_ref_sub(pgcache->page, pgcache->ref_count - 1); put_page(pgcache->page); } head++; diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c @@ -354,7 +354,10 @@ static struct cxgbi_ppm_pool *ppm_alloc_cpu_pool(unsigned int *total, ppmax = max; /* pool size must be multiple of unsigned long */ - bmap = BITS_TO_LONGS(ppmax); + bmap = ppmax / BITS_PER_TYPE(unsigned long); + if (!bmap) + return NULL; + ppmax = (bmap * sizeof(unsigned long)) << 3; alloc_sz = sizeof(*pools) + sizeof(unsigned long) * bmap; @@ -402,6 +405,10 @@ int cxgbi_ppm_init(void **ppm_pp, struct net_device *ndev, if (reserve_factor) { ppmax_pool = ppmax / reserve_factor; pool = ppm_alloc_cpu_pool(&ppmax_pool, &pool_index_max); + if (!pool) { + ppmax_pool = 0; + reserve_factor = 0; + } pr_debug("%s: ppmax %u, cpu total %u, per cpu %u.\n", ndev->name, ppmax, ppmax_pool, pool_index_max); diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.c b/drivers/net/ethernet/hisilicon/hns/hnae.c @@ -150,7 +150,6 @@ out_buffer_fail: /* free desc along with its attached buffer */ static void hnae_free_desc(struct hnae_ring *ring) { - hnae_free_buffers(ring); dma_unmap_single(ring_to_dev(ring), ring->desc_dma_addr, ring->desc_num * sizeof(ring->desc[0]), ring_to_dma_dir(ring)); @@ -183,6 +182,9 @@ static int hnae_alloc_desc(struct hnae_ring *ring) /* fini ring, also free the buffer for the ring */ static void hnae_fini_ring(struct hnae_ring *ring) { + if (is_rx_ring(ring)) + hnae_free_buffers(ring); + hnae_free_desc(ring); kfree(ring->desc_cb); ring->desc_cb = NULL; diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h @@ -357,7 +357,7 @@ struct hnae_buf_ops { }; struct hnae_queue { - void __iomem *io_base; + u8 __iomem *io_base; phys_addr_t phy_base; struct hnae_ae_dev *dev; /* the device who use this queue */ struct hnae_ring rx_ring ____cacheline_internodealigned_in_smp; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c @@ -370,7 +370,7 @@ int hns_mac_clr_multicast(struct hns_mac_cb *mac_cb, int vfn) static void hns_mac_param_get(struct mac_params *param, struct hns_mac_cb *mac_cb) { - param->vaddr = (void *)mac_cb->vaddr; + param->vaddr = mac_cb->vaddr; param->mac_mode = hns_get_enet_interface(mac_cb); ether_addr_copy(param->addr, mac_cb->addr_entry_idx[0].addr); param->mac_id = mac_cb->mac_id; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h @@ -187,7 +187,7 @@ struct mac_statistics { /*mac para struct ,mac get param from nic or dsaf when initialize*/ struct mac_params { char addr[ETH_ALEN]; - void *vaddr; /*virtual address*/ + u8 __iomem *vaddr; /*virtual address*/ struct device *dev; u8 mac_id; /**< Ethernet operation mode (MAC-PHY interface and speed) */ @@ -402,7 +402,7 @@ struct mac_driver { enum mac_mode mac_mode; u8 mac_id; struct hns_mac_cb *mac_cb; - void __iomem *io_base; + u8 __iomem *io_base; unsigned int mac_en_flg;/*you'd better don't enable mac twice*/ unsigned int virt_dev_num; struct device *dev; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -1602,8 +1602,6 @@ static void hns_dsaf_set_mac_key( DSAF_TBL_TCAM_KEY_VLAN_S, vlan_id); dsaf_set_field(mac_key->low.bits.port_vlan, DSAF_TBL_TCAM_KEY_PORT_M, DSAF_TBL_TCAM_KEY_PORT_S, port); - - mac_key->low.bits.port_vlan = le16_to_cpu(mac_key->low.bits.port_vlan); } /** @@ -1663,8 +1661,8 @@ int hns_dsaf_set_mac_uc_entry( /* default config dvc to 0 */ mac_data.tbl_ucast_dvc = 0; mac_data.tbl_ucast_out_port = mac_entry->port_num; - tcam_data.tbl_tcam_data_high = cpu_to_le32(mac_key.high.val); - tcam_data.tbl_tcam_data_low = cpu_to_le32(mac_key.low.val); + tcam_data.tbl_tcam_data_high = mac_key.high.val; + tcam_data.tbl_tcam_data_low = mac_key.low.val; hns_dsaf_tcam_uc_cfg(dsaf_dev, entry_index, &tcam_data, &mac_data); @@ -1786,9 +1784,6 @@ int hns_dsaf_add_mac_mc_port(struct dsaf_device *dsaf_dev, 0xff, mc_mask); - mask_key.high.val = le32_to_cpu(mask_key.high.val); - mask_key.low.val = le32_to_cpu(mask_key.low.val); - pmask_key = (struct dsaf_tbl_tcam_data *)(&mask_key); } @@ -1840,8 +1835,8 @@ int hns_dsaf_add_mac_mc_port(struct dsaf_device *dsaf_dev, dsaf_dev->ae_dev.name, mac_key.high.val, mac_key.low.val, entry_index); - tcam_data.tbl_tcam_data_high = cpu_to_le32(mac_key.high.val); - tcam_data.tbl_tcam_data_low = cpu_to_le32(mac_key.low.val); + tcam_data.tbl_tcam_data_high = mac_key.high.val; + tcam_data.tbl_tcam_data_low = mac_key.low.val; /* config mc entry with mask */ hns_dsaf_tcam_mc_cfg(dsaf_dev, entry_index, &tcam_data, @@ -1956,9 +1951,6 @@ int hns_dsaf_del_mac_mc_port(struct dsaf_device *dsaf_dev, /* config key mask */ hns_dsaf_set_mac_key(dsaf_dev, &mask_key, 0x00, 0xff, mc_mask); - mask_key.high.val = le32_to_cpu(mask_key.high.val); - mask_key.low.val = le32_to_cpu(mask_key.low.val); - pmask_key = (struct dsaf_tbl_tcam_data *)(&mask_key); } @@ -2012,8 +2004,8 @@ int hns_dsaf_del_mac_mc_port(struct dsaf_device *dsaf_dev, soft_mac_entry += entry_index; soft_mac_entry->index = DSAF_INVALID_ENTRY_IDX; } else { /* not zero, just del port, update */ - tcam_data.tbl_tcam_data_high = cpu_to_le32(mac_key.high.val); - tcam_data.tbl_tcam_data_low = cpu_to_le32(mac_key.low.val); + tcam_data.tbl_tcam_data_high = mac_key.high.val; + tcam_data.tbl_tcam_data_low = mac_key.low.val; hns_dsaf_tcam_mc_cfg(dsaf_dev, entry_index, &tcam_data, @@ -2750,6 +2742,17 @@ int hns_dsaf_get_regs_count(void) return DSAF_DUMP_REGS_NUM; } +static int hns_dsaf_get_port_id(u8 port) +{ + if (port < DSAF_SERVICE_NW_NUM) + return port; + + if (port >= DSAF_BASE_INNER_PORT_NUM) + return port - DSAF_BASE_INNER_PORT_NUM + DSAF_SERVICE_NW_NUM; + + return -EINVAL; +} + static void set_promisc_tcam_enable(struct dsaf_device *dsaf_dev, u32 port) { struct dsaf_tbl_tcam_ucast_cfg tbl_tcam_ucast = {0, 1, 0, 0, 0x80}; @@ -2815,23 +2818,33 @@ static void set_promisc_tcam_enable(struct dsaf_device *dsaf_dev, u32 port) memset(&temp_key, 0x0, sizeof(temp_key)); mask_entry.addr[0] = 0x01; hns_dsaf_set_mac_key(dsaf_dev, &mask_key, mask_entry.in_vlan_id, - port, mask_entry.addr); + 0xf, mask_entry.addr); tbl_tcam_mcast.tbl_mcast_item_vld = 1; tbl_tcam_mcast.tbl_mcast_old_en = 0; - if (port < DSAF_SERVICE_NW_NUM) { - mskid = port; - } else if (port >= DSAF_BASE_INNER_PORT_NUM) { - mskid = port - DSAF_BASE_INNER_PORT_NUM + DSAF_SERVICE_NW_NUM; - } else { + /* set MAC port to handle multicast */ + mskid = hns_dsaf_get_port_id(port); + if (mskid == -EINVAL) { dev_err(dsaf_dev->dev, "%s,pnum(%d)error,key(%#x:%#x)\n", dsaf_dev->ae_dev.name, port, mask_key.high.val, mask_key.low.val); return; } + dsaf_set_bit(tbl_tcam_mcast.tbl_mcast_port_msk[mskid / 32], + mskid % 32, 1); + /* set pool bit map to handle multicast */ + mskid = hns_dsaf_get_port_id(port_num); + if (mskid == -EINVAL) { + dev_err(dsaf_dev->dev, + "%s, pool bit map pnum(%d)error,key(%#x:%#x)\n", + dsaf_dev->ae_dev.name, port_num, + mask_key.high.val, mask_key.low.val); + return; + } dsaf_set_bit(tbl_tcam_mcast.tbl_mcast_port_msk[mskid / 32], mskid % 32, 1); + memcpy(&temp_key, &mask_key, sizeof(mask_key)); hns_dsaf_tcam_mc_cfg_vague(dsaf_dev, entry_index, &tbl_tcam_data_mc, (struct dsaf_tbl_tcam_data *)(&mask_key), diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h @@ -467,4 +467,6 @@ int hns_dsaf_clr_mac_mc_port(struct dsaf_device *dsaf_dev, u8 mac_id, u8 port_num); int hns_dsaf_wait_pkt_clean(struct dsaf_device *dsaf_dev, int port); +int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset); + #endif /* __HNS_DSAF_MAIN_H__ */ diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c @@ -670,7 +670,7 @@ static int hns_mac_config_sds_loopback(struct hns_mac_cb *mac_cb, bool en) dsaf_set_field(origin, 1ull << 10, 10, en); dsaf_write_syscon(mac_cb->serdes_ctrl, reg_offset, origin); } else { - u8 *base_addr = (u8 *)mac_cb->serdes_vaddr + + u8 __iomem *base_addr = mac_cb->serdes_vaddr + (mac_cb->mac_id <= 3 ? 0x00280000 : 0x00200000); dsaf_set_reg_field(base_addr, reg_offset, 1ull << 10, 10, en); } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c @@ -61,7 +61,7 @@ void hns_ppe_set_indir_table(struct hns_ppe_cb *ppe_cb, } } -static void __iomem * +static u8 __iomem * hns_ppe_common_get_ioaddr(struct ppe_common_cb *ppe_common) { return ppe_common->dsaf_dev->ppe_base + PPE_COMMON_REG_OFFSET; @@ -111,8 +111,8 @@ hns_ppe_common_free_cfg(struct dsaf_device *dsaf_dev, u32 comm_index) dsaf_dev->ppe_common[comm_index] = NULL; } -static void __iomem *hns_ppe_get_iobase(struct ppe_common_cb *ppe_common, - int ppe_idx) +static u8 __iomem *hns_ppe_get_iobase(struct ppe_common_cb *ppe_common, + int ppe_idx) { return ppe_common->dsaf_dev->ppe_base + ppe_idx * PPE_REG_OFFSET; } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h @@ -80,7 +80,7 @@ struct hns_ppe_cb { struct hns_ppe_hw_stats hw_stats; u8 index; /* index in a ppe common device */ - void __iomem *io_base; + u8 __iomem *io_base; int virq; u32 rss_indir_table[HNS_PPEV2_RSS_IND_TBL_SIZE]; /*shadow indir tab */ u32 rss_key[HNS_PPEV2_RSS_KEY_NUM]; /* rss hash key */ @@ -89,7 +89,7 @@ struct hns_ppe_cb { struct ppe_common_cb { struct device *dev; struct dsaf_device *dsaf_dev; - void __iomem *io_base; + u8 __iomem *io_base; enum ppe_common_mode ppe_mode; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c @@ -458,7 +458,7 @@ static void hns_rcb_ring_get_cfg(struct hnae_queue *q, int ring_type) mdnum_ppkt = HNS_RCB_RING_MAX_BD_PER_PKT; } else { ring = &q->tx_ring; - ring->io_base = (u8 __iomem *)ring_pair_cb->q.io_base + + ring->io_base = ring_pair_cb->q.io_base + HNS_RCB_TX_REG_OFFSET; irq_idx = HNS_RCB_IRQ_IDX_TX; mdnum_ppkt = is_ver1 ? HNS_RCB_RING_MAX_TXBD_PER_PKT : @@ -764,7 +764,7 @@ static int hns_rcb_get_ring_num(struct dsaf_device *dsaf_dev) } } -static void __iomem *hns_rcb_common_get_vaddr(struct rcb_common_cb *rcb_common) +static u8 __iomem *hns_rcb_common_get_vaddr(struct rcb_common_cb *rcb_common) { struct dsaf_device *dsaf_dev = rcb_common->dsaf_dev; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h @@ -1018,7 +1018,7 @@ #define XGMAC_PAUSE_CTL_RSP_MODE_B 2 #define XGMAC_PAUSE_CTL_TX_XOFF_B 3 -static inline void dsaf_write_reg(void __iomem *base, u32 reg, u32 value) +static inline void dsaf_write_reg(u8 __iomem *base, u32 reg, u32 value) { writel(value, base + reg); } @@ -1053,7 +1053,7 @@ static inline int dsaf_read_syscon(struct regmap *base, u32 reg, u32 *val) #define dsaf_set_bit(origin, shift, val) \ dsaf_set_field((origin), (1ull << (shift)), (shift), (val)) -static inline void dsaf_set_reg_field(void __iomem *base, u32 reg, u32 mask, +static inline void dsaf_set_reg_field(u8 __iomem *base, u32 reg, u32 mask, u32 shift, u32 val) { u32 origin = dsaf_read_reg(base, reg); @@ -1073,7 +1073,7 @@ static inline void dsaf_set_reg_field(void __iomem *base, u32 reg, u32 mask, #define dsaf_get_bit(origin, shift) \ dsaf_get_field((origin), (1ull << (shift)), (shift)) -static inline u32 dsaf_get_reg_field(void __iomem *base, u32 reg, u32 mask, +static inline u32 dsaf_get_reg_field(u8 __iomem *base, u32 reg, u32 mask, u32 shift) { u32 origin; @@ -1089,11 +1089,11 @@ static inline u32 dsaf_get_reg_field(void __iomem *base, u32 reg, u32 mask, dsaf_get_reg_field((dev)->io_base, (reg), (1ull << (bit)), (bit)) #define dsaf_write_b(addr, data)\ - writeb((data), (__iomem unsigned char *)(addr)) + writeb((data), (__iomem u8 *)(addr)) #define dsaf_read_b(addr)\ - readb((__iomem unsigned char *)(addr)) + readb((__iomem u8 *)(addr)) #define hns_mac_reg_read64(drv, offset) \ - readq((__iomem void *)(((u8 *)(drv)->io_base + 0xc00 + (offset)))) + readq((__iomem void *)(((drv)->io_base + 0xc00 + (offset)))) #endif /* _DSAF_REG_H */ diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c @@ -129,7 +129,7 @@ static void hns_xgmac_lf_rf_control_init(struct mac_driver *mac_drv) dsaf_set_bit(val, XGMAC_UNIDIR_EN_B, 0); dsaf_set_bit(val, XGMAC_RF_TX_EN_B, 1); dsaf_set_field(val, XGMAC_LF_RF_INSERT_M, XGMAC_LF_RF_INSERT_S, 0); - dsaf_write_reg(mac_drv, XGMAC_MAC_TX_LF_RF_CONTROL_REG, val); + dsaf_write_dev(mac_drv, XGMAC_MAC_TX_LF_RF_CONTROL_REG, val); } /** diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -29,9 +29,6 @@ #define SERVICE_TIMER_HZ (1 * HZ) -#define NIC_TX_CLEAN_MAX_NUM 256 -#define NIC_RX_CLEAN_MAX_NUM 64 - #define RCB_IRQ_NOT_INITED 0 #define RCB_IRQ_INITED 1 #define HNS_BUFFER_SIZE_2048 2048 @@ -376,8 +373,6 @@ netdev_tx_t hns_nic_net_xmit_hw(struct net_device *ndev, wmb(); /* commit all data before submit */ assert(skb->queue_mapping < priv->ae_handle->q_num); hnae_queue_xmit(priv->ae_handle->qs[skb->queue_mapping], buf_num); - ring->stats.tx_pkts++; - ring->stats.tx_bytes += skb->len; return NETDEV_TX_OK; @@ -999,6 +994,9 @@ static int hns_nic_tx_poll_one(struct hns_nic_ring_data *ring_data, /* issue prefetch for next Tx descriptor */ prefetch(&ring->desc_cb[ring->next_to_clean]); } + /* update tx ring statistics. */ + ring->stats.tx_pkts += pkts; + ring->stats.tx_bytes += bytes; NETIF_TX_UNLOCK(ring); @@ -2152,7 +2150,7 @@ static int hns_nic_init_ring_data(struct hns_nic_priv *priv) hns_nic_tx_fini_pro_v2; netif_napi_add(priv->netdev, &rd->napi, - hns_nic_common_poll, NIC_TX_CLEAN_MAX_NUM); + hns_nic_common_poll, NAPI_POLL_WEIGHT); rd->ring->irq_init_flag = RCB_IRQ_NOT_INITED; } for (i = h->q_num; i < h->q_num * 2; i++) { @@ -2165,7 +2163,7 @@ static int hns_nic_init_ring_data(struct hns_nic_priv *priv) hns_nic_rx_fini_pro_v2; netif_napi_add(priv->netdev, &rd->napi, - hns_nic_common_poll, NIC_RX_CLEAN_MAX_NUM); + hns_nic_common_poll, NAPI_POLL_WEIGHT); rd->ring->irq_init_flag = RCB_IRQ_NOT_INITED; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile @@ -3,7 +3,7 @@ # Makefile for the HISILICON network device drivers. # -ccflags-y := -Idrivers/net/ethernet/hisilicon/hns3 +ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3 obj-$(CONFIG_HNS3_HCLGE) += hclge.o hclge-objs = hclge_main.o hclge_cmd.o hclge_mdio.o hclge_tm.o hclge_mbx.o hclge_err.o hclge_debugfs.o diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile @@ -3,7 +3,7 @@ # Makefile for the HISILICON network device drivers. # -ccflags-y := -Idrivers/net/ethernet/hisilicon/hns3 +ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3 obj-$(CONFIG_HNS3_HCLGEVF) += hclgevf.o hclgevf-objs = hclgevf_main.o hclgevf_cmd.o hclgevf_mbx.o \ No newline at end of file diff --git a/drivers/net/ethernet/hisilicon/hns_mdio.c b/drivers/net/ethernet/hisilicon/hns_mdio.c @@ -39,7 +39,7 @@ struct hns_mdio_sc_reg { }; struct hns_mdio_device { - void *vbase; /* mdio reg base address */ + u8 __iomem *vbase; /* mdio reg base address */ struct regmap *subctrl_vbase; struct hns_mdio_sc_reg sc_reg; }; @@ -96,21 +96,17 @@ enum mdio_c45_op_seq { #define MDIO_SC_CLK_ST 0x531C #define MDIO_SC_RESET_ST 0x5A1C -static void mdio_write_reg(void *base, u32 reg, u32 value) +static void mdio_write_reg(u8 __iomem *base, u32 reg, u32 value) { - u8 __iomem *reg_addr = (u8 __iomem *)base; - - writel_relaxed(value, reg_addr + reg); + writel_relaxed(value, base + reg); } #define MDIO_WRITE_REG(a, reg, value) \ mdio_write_reg((a)->vbase, (reg), (value)) -static u32 mdio_read_reg(void *base, u32 reg) +static u32 mdio_read_reg(u8 __iomem *base, u32 reg) { - u8 __iomem *reg_addr = (u8 __iomem *)base; - - return readl_relaxed(reg_addr + reg); + return readl_relaxed(base + reg); } #define mdio_set_field(origin, mask, shift, val) \ @@ -121,7 +117,7 @@ static u32 mdio_read_reg(void *base, u32 reg) #define mdio_get_field(origin, mask, shift) (((origin) >> (shift)) & (mask)) -static void mdio_set_reg_field(void *base, u32 reg, u32 mask, u32 shift, +static void mdio_set_reg_field(u8 __iomem *base, u32 reg, u32 mask, u32 shift, u32 val) { u32 origin = mdio_read_reg(base, reg); @@ -133,7 +129,7 @@ static void mdio_set_reg_field(void *base, u32 reg, u32 mask, u32 shift, #define MDIO_SET_REG_FIELD(dev, reg, mask, shift, val) \ mdio_set_reg_field((dev)->vbase, (reg), (mask), (shift), (val)) -static u32 mdio_get_reg_field(void *base, u32 reg, u32 mask, u32 shift) +static u32 mdio_get_reg_field(u8 __iomem *base, u32 reg, u32 mask, u32 shift) { u32 origin; diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1885,6 +1885,7 @@ static int do_hard_reset(struct ibmvnic_adapter *adapter, */ adapter->state = VNIC_PROBED; + reinit_completion(&adapter->init_done); rc = init_crq_queue(adapter); if (rc) { netdev_err(adapter->netdev, @@ -4625,7 +4626,7 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter) old_num_rx_queues = adapter->req_rx_queues; old_num_tx_queues = adapter->req_tx_queues; - init_completion(&adapter->init_done); + reinit_completion(&adapter->init_done); adapter->init_done_rc = 0; ibmvnic_send_crq_init(adapter); if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { @@ -4680,7 +4681,6 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter) adapter->from_passive_init = false; - init_completion(&adapter->init_done); adapter->init_done_rc = 0; ibmvnic_send_crq_init(adapter); if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { @@ -4759,6 +4759,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) INIT_WORK(&adapter->ibmvnic_reset, __ibmvnic_reset); INIT_LIST_HEAD(&adapter->rwi_list); spin_lock_init(&adapter->rwi_lock); + init_completion(&adapter->init_done); adapter->resetting = false; adapter->mac_change_pending = false; diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -41,6 +41,8 @@ static int __init fm10k_init_module(void) /* create driver workqueue */ fm10k_workqueue = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, fm10k_driver_name); + if (!fm10k_workqueue) + return -ENOMEM; fm10k_dbg_init(); diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h @@ -790,6 +790,8 @@ struct i40e_vsi { /* VSI specific handlers */ irqreturn_t (*irq_handler)(int irq, void *data); + + unsigned long *af_xdp_zc_qps; /* tracks AF_XDP ZC enabled qps */ } ____cacheline_internodealigned_in_smp; struct i40e_netdev_priv { @@ -1096,20 +1098,6 @@ static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi) return !!vsi->xdp_prog; } -static inline struct xdp_umem *i40e_xsk_umem(struct i40e_ring *ring) -{ - bool xdp_on = i40e_enabled_xdp_vsi(ring->vsi); - int qid = ring->queue_index; - - if (ring_is_xdp(ring)) - qid -= ring->vsi->alloc_queue_pairs; - - if (!xdp_on) - return NULL; - - return xdp_get_umem_from_qid(ring->vsi->netdev, qid); -} - int i40e_create_queue_channel(struct i40e_vsi *vsi, struct i40e_channel *ch); int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate); int i40e_add_del_cloud_filter(struct i40e_vsi *vsi, diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -2573,8 +2573,7 @@ static int i40e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) return -EOPNOTSUPP; /* only magic packet is supported */ - if (wol->wolopts && (wol->wolopts != WAKE_MAGIC) - | (wol->wolopts != WAKE_FILTER)) + if (wol->wolopts & ~WAKE_MAGIC) return -EOPNOTSUPP; /* is this a new value? */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -3064,6 +3064,26 @@ static void i40e_config_xps_tx_ring(struct i40e_ring *ring) } /** + * i40e_xsk_umem - Retrieve the AF_XDP ZC if XDP and ZC is enabled + * @ring: The Tx or Rx ring + * + * Returns the UMEM or NULL. + **/ +static struct xdp_umem *i40e_xsk_umem(struct i40e_ring *ring) +{ + bool xdp_on = i40e_enabled_xdp_vsi(ring->vsi); + int qid = ring->queue_index; + + if (ring_is_xdp(ring)) + qid -= ring->vsi->alloc_queue_pairs; + + if (!xdp_on || !test_bit(qid, ring->vsi->af_xdp_zc_qps)) + return NULL; + + return xdp_get_umem_from_qid(ring->vsi->netdev, qid); +} + +/** * i40e_configure_tx_ring - Configure a transmit ring context and rest * @ring: The Tx ring to configure * @@ -10064,6 +10084,12 @@ static int i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type) hash_init(vsi->mac_filter_hash); vsi->irqs_ready = false; + if (type == I40E_VSI_MAIN) { + vsi->af_xdp_zc_qps = bitmap_zalloc(pf->num_lan_qps, GFP_KERNEL); + if (!vsi->af_xdp_zc_qps) + goto err_rings; + } + ret = i40e_set_num_rings_in_vsi(vsi); if (ret) goto err_rings; @@ -10082,6 +10108,7 @@ static int i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type) goto unlock_pf; err_rings: + bitmap_free(vsi->af_xdp_zc_qps); pf->next_vsi = i - 1; kfree(vsi); unlock_pf: @@ -10162,6 +10189,7 @@ static int i40e_vsi_clear(struct i40e_vsi *vsi) i40e_put_lump(pf->qp_pile, vsi->base_queue, vsi->idx); i40e_put_lump(pf->irq_pile, vsi->base_vector, vsi->idx); + bitmap_free(vsi->af_xdp_zc_qps); i40e_vsi_free_arrays(vsi, true); i40e_clear_rss_config_user(vsi); diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -146,12 +146,13 @@ static int i40e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) static int i40e_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) { struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps); - struct timespec64 now; + struct timespec64 now, then; + then = ns_to_timespec64(delta); mutex_lock(&pf->tmreg_lock); i40e_ptp_read(pf, &now, NULL); - timespec64_add_ns(&now, delta); + now = timespec64_add(now, then); i40e_ptp_write(pf, (const struct timespec64 *)&now); mutex_unlock(&pf->tmreg_lock); diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -102,6 +102,8 @@ static int i40e_xsk_umem_enable(struct i40e_vsi *vsi, struct xdp_umem *umem, if (err) return err; + set_bit(qid, vsi->af_xdp_zc_qps); + if_running = netif_running(vsi->netdev) && i40e_enabled_xdp_vsi(vsi); if (if_running) { @@ -148,6 +150,7 @@ static int i40e_xsk_umem_disable(struct i40e_vsi *vsi, u16 qid) return err; } + clear_bit(qid, vsi->af_xdp_zc_qps); i40e_xsk_umem_dma_unmap(vsi, umem); if (if_running) { diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h @@ -194,6 +194,8 @@ /* enable link status from external LINK_0 and LINK_1 pins */ #define E1000_CTRL_SWDPIN0 0x00040000 /* SWDPIN 0 value */ #define E1000_CTRL_SWDPIN1 0x00080000 /* SWDPIN 1 value */ +#define E1000_CTRL_ADVD3WUC 0x00100000 /* D3 WUC */ +#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000 /* PHY PM enable */ #define E1000_CTRL_SDP0_DIR 0x00400000 /* SDP0 Data direction */ #define E1000_CTRL_SDP1_DIR 0x00800000 /* SDP1 Data direction */ #define E1000_CTRL_RST 0x04000000 /* Global reset */ diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c @@ -8740,9 +8740,7 @@ static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake, struct e1000_hw *hw = &adapter->hw; u32 ctrl, rctl, status; u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol; -#ifdef CONFIG_PM - int retval = 0; -#endif + bool wake; rtnl_lock(); netif_device_detach(netdev); @@ -8755,14 +8753,6 @@ static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake, igb_clear_interrupt_scheme(adapter); rtnl_unlock(); -#ifdef CONFIG_PM - if (!runtime) { - retval = pci_save_state(pdev); - if (retval) - return retval; - } -#endif - status = rd32(E1000_STATUS); if (status & E1000_STATUS_LU) wufc &= ~E1000_WUFC_LNKC; @@ -8779,10 +8769,6 @@ static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake, } ctrl = rd32(E1000_CTRL); - /* advertise wake from D3Cold */ - #define E1000_CTRL_ADVD3WUC 0x00100000 - /* phy power management enable */ - #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000 ctrl |= E1000_CTRL_ADVD3WUC; wr32(E1000_CTRL, ctrl); @@ -8796,12 +8782,15 @@ static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake, wr32(E1000_WUFC, 0); } - *enable_wake = wufc || adapter->en_mng_pt; - if (!*enable_wake) + wake = wufc || adapter->en_mng_pt; + if (!wake) igb_power_down_link(adapter); else igb_power_up_link(adapter); + if (enable_wake) + *enable_wake = wake; + /* Release control of h/w to f/w. If f/w is AMT enabled, this * would have already happened in close and is redundant. */ @@ -8844,22 +8833,7 @@ static void igb_deliver_wake_packet(struct net_device *netdev) static int __maybe_unused igb_suspend(struct device *dev) { - int retval; - bool wake; - struct pci_dev *pdev = to_pci_dev(dev); - - retval = __igb_shutdown(pdev, &wake, 0); - if (retval) - return retval; - - if (wake) { - pci_prepare_to_sleep(pdev); - } else { - pci_wake_from_d3(pdev, false); - pci_set_power_state(pdev, PCI_D3hot); - } - - return 0; + return __igb_shutdown(to_pci_dev(dev), NULL, 0); } static int __maybe_unused igb_resume(struct device *dev) @@ -8930,22 +8904,7 @@ static int __maybe_unused igb_runtime_idle(struct device *dev) static int __maybe_unused igb_runtime_suspend(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); - int retval; - bool wake; - - retval = __igb_shutdown(pdev, &wake, 1); - if (retval) - return retval; - - if (wake) { - pci_prepare_to_sleep(pdev); - } else { - pci_wake_from_d3(pdev, false); - pci_set_power_state(pdev, PCI_D3hot); - } - - return 0; + return __igb_shutdown(to_pci_dev(dev), NULL, 1); } static int __maybe_unused igb_runtime_resume(struct device *dev) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c @@ -905,13 +905,12 @@ s32 ixgbe_mii_bus_init(struct ixgbe_hw *hw) struct pci_dev *pdev = adapter->pdev; struct device *dev = &adapter->netdev->dev; struct mii_bus *bus; + int err = -ENODEV; - adapter->mii_bus = devm_mdiobus_alloc(dev); - if (!adapter->mii_bus) + bus = devm_mdiobus_alloc(dev); + if (!bus) return -ENOMEM; - bus = adapter->mii_bus; - switch (hw->device_id) { /* C3000 SoCs */ case IXGBE_DEV_ID_X550EM_A_KR: @@ -949,12 +948,15 @@ s32 ixgbe_mii_bus_init(struct ixgbe_hw *hw) */ hw->phy.mdio.mode_support = MDIO_SUPPORTS_C45 | MDIO_SUPPORTS_C22; - return mdiobus_register(bus); + err = mdiobus_register(bus); + if (!err) { + adapter->mii_bus = bus; + return 0; + } ixgbe_no_mii_bus: devm_mdiobus_free(dev, bus); - adapter->mii_bus = NULL; - return -ENODEV; + return err; } /** diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -96,9 +96,6 @@ int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext, if (!eproto) return -EINVAL; - if (ext != MLX5_CAP_PCAM_FEATURE(dev, ptys_extended_ethernet)) - return -EOPNOTSUPP; - err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, port); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c @@ -122,7 +122,9 @@ out: return err; } -/* xoff = ((301+2.16 * len [m]) * speed [Gbps] + 2.72 MTU [B]) */ +/* xoff = ((301+2.16 * len [m]) * speed [Gbps] + 2.72 MTU [B]) + * minimum speed value is 40Gbps + */ static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu) { u32 speed; @@ -130,10 +132,9 @@ static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu) int err; err = mlx5e_port_linkspeed(priv->mdev, &speed); - if (err) { - mlx5_core_warn(priv->mdev, "cannot get port speed\n"); - return 0; - } + if (err) + speed = SPEED_40000; + speed = max_t(u32, speed, SPEED_40000); xoff = (301 + 216 * priv->dcbx.cable_len / 100) * speed / 1000 + 272 * mtu / 100; @@ -142,7 +143,7 @@ static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu) } static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, - u32 xoff, unsigned int mtu) + u32 xoff, unsigned int max_mtu) { int i; @@ -154,11 +155,12 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, } if (port_buffer->buffer[i].size < - (xoff + mtu + (1 << MLX5E_BUFFER_CELL_SHIFT))) + (xoff + max_mtu + (1 << MLX5E_BUFFER_CELL_SHIFT))) return -ENOMEM; port_buffer->buffer[i].xoff = port_buffer->buffer[i].size - xoff; - port_buffer->buffer[i].xon = port_buffer->buffer[i].xoff - mtu; + port_buffer->buffer[i].xon = + port_buffer->buffer[i].xoff - max_mtu; } return 0; @@ -166,7 +168,7 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, /** * update_buffer_lossy() - * mtu: device's MTU + * max_mtu: netdev's max_mtu * pfc_en: <input> current pfc configuration * buffer: <input> current prio to buffer mapping * xoff: <input> xoff value @@ -183,7 +185,7 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, * Return 0 if no error. * Set change to true if buffer configuration is modified. */ -static int update_buffer_lossy(unsigned int mtu, +static int update_buffer_lossy(unsigned int max_mtu, u8 pfc_en, u8 *buffer, u32 xoff, struct mlx5e_port_buffer *port_buffer, bool *change) @@ -220,7 +222,7 @@ static int update_buffer_lossy(unsigned int mtu, } if (changed) { - err = update_xoff_threshold(port_buffer, xoff, mtu); + err = update_xoff_threshold(port_buffer, xoff, max_mtu); if (err) return err; @@ -230,6 +232,7 @@ static int update_buffer_lossy(unsigned int mtu, return 0; } +#define MINIMUM_MAX_MTU 9216 int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, u32 change, unsigned int mtu, struct ieee_pfc *pfc, @@ -241,12 +244,14 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, bool update_prio2buffer = false; u8 buffer[MLX5E_MAX_PRIORITY]; bool update_buffer = false; + unsigned int max_mtu; u32 total_used = 0; u8 curr_pfc_en; int err; int i; mlx5e_dbg(HW, priv, "%s: change=%x\n", __func__, change); + max_mtu = max_t(unsigned int, priv->netdev->max_mtu, MINIMUM_MAX_MTU); err = mlx5e_port_query_buffer(priv, &port_buffer); if (err) @@ -254,7 +259,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, if (change & MLX5E_PORT_BUFFER_CABLE_LEN) { update_buffer = true; - err = update_xoff_threshold(&port_buffer, xoff, mtu); + err = update_xoff_threshold(&port_buffer, xoff, max_mtu); if (err) return err; } @@ -264,7 +269,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, if (err) return err; - err = update_buffer_lossy(mtu, pfc->pfc_en, buffer, xoff, + err = update_buffer_lossy(max_mtu, pfc->pfc_en, buffer, xoff, &port_buffer, &update_buffer); if (err) return err; @@ -276,8 +281,8 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, if (err) return err; - err = update_buffer_lossy(mtu, curr_pfc_en, prio2buffer, xoff, - &port_buffer, &update_buffer); + err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer, + xoff, &port_buffer, &update_buffer); if (err) return err; } @@ -301,7 +306,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, return -EINVAL; update_buffer = true; - err = update_xoff_threshold(&port_buffer, xoff, mtu); + err = update_xoff_threshold(&port_buffer, xoff, max_mtu); if (err) return err; } @@ -309,7 +314,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, /* Need to update buffer configuration if xoff value is changed */ if (!update_buffer && xoff != priv->dcbx.xoff) { update_buffer = true; - err = update_xoff_threshold(&port_buffer, xoff, mtu); + err = update_xoff_threshold(&port_buffer, xoff, max_mtu); if (err) return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -45,7 +45,9 @@ int mlx5e_create_tir(struct mlx5_core_dev *mdev, if (err) return err; + mutex_lock(&mdev->mlx5e_res.td.list_lock); list_add(&tir->list, &mdev->mlx5e_res.td.tirs_list); + mutex_unlock(&mdev->mlx5e_res.td.list_lock); return 0; } @@ -53,8 +55,10 @@ int mlx5e_create_tir(struct mlx5_core_dev *mdev, void mlx5e_destroy_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir) { + mutex_lock(&mdev->mlx5e_res.td.list_lock); mlx5_core_destroy_tir(mdev, tir->tirn); list_del(&tir->list); + mutex_unlock(&mdev->mlx5e_res.td.list_lock); } static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, @@ -114,6 +118,7 @@ int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev) } INIT_LIST_HEAD(&mdev->mlx5e_res.td.tirs_list); + mutex_init(&mdev->mlx5e_res.td.list_lock); return 0; @@ -141,15 +146,17 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb) { struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_tir *tir; - int err = -ENOMEM; + int err = 0; u32 tirn = 0; int inlen; void *in; inlen = MLX5_ST_SZ_BYTES(modify_tir_in); in = kvzalloc(inlen, GFP_KERNEL); - if (!in) + if (!in) { + err = -ENOMEM; goto out; + } if (enable_uc_lb) MLX5_SET(modify_tir_in, in, ctx.self_lb_block, @@ -157,6 +164,7 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb) MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1); + mutex_lock(&mdev->mlx5e_res.td.list_lock); list_for_each_entry(tir, &mdev->mlx5e_res.td.tirs_list, list) { tirn = tir->tirn; err = mlx5_core_modify_tir(mdev, tirn, in, inlen); @@ -168,6 +176,7 @@ out: kvfree(in); if (err) netdev_err(priv->netdev, "refresh tir(0x%x) failed, %d\n", tirn, err); + mutex_unlock(&mdev->mlx5e_res.td.list_lock); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -603,16 +603,18 @@ static void ptys2ethtool_supported_link(struct mlx5_core_dev *mdev, __ETHTOOL_LINK_MODE_MASK_NBITS); } -static void ptys2ethtool_adver_link(struct mlx5_core_dev *mdev, - unsigned long *advertising_modes, - u32 eth_proto_cap) +static void ptys2ethtool_adver_link(unsigned long *advertising_modes, + u32 eth_proto_cap, bool ext) { unsigned long proto_cap = eth_proto_cap; struct ptys2ethtool_config *table; u32 max_size; int proto; - mlx5e_ethtool_get_speed_arr(mdev, &table, &max_size); + table = ext ? ptys2ext_ethtool_table : ptys2legacy_ethtool_table; + max_size = ext ? ARRAY_SIZE(ptys2ext_ethtool_table) : + ARRAY_SIZE(ptys2legacy_ethtool_table); + for_each_set_bit(proto, &proto_cap, max_size) bitmap_or(advertising_modes, advertising_modes, table[proto].advertised, @@ -794,12 +796,12 @@ static void get_supported(struct mlx5_core_dev *mdev, u32 eth_proto_cap, ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Pause); } -static void get_advertising(struct mlx5_core_dev *mdev, u32 eth_proto_cap, - u8 tx_pause, u8 rx_pause, - struct ethtool_link_ksettings *link_ksettings) +static void get_advertising(u32 eth_proto_cap, u8 tx_pause, u8 rx_pause, + struct ethtool_link_ksettings *link_ksettings, + bool ext) { unsigned long *advertising = link_ksettings->link_modes.advertising; - ptys2ethtool_adver_link(mdev, advertising, eth_proto_cap); + ptys2ethtool_adver_link(advertising, eth_proto_cap, ext); if (rx_pause) ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Pause); @@ -854,8 +856,9 @@ static void get_lp_advertising(struct mlx5_core_dev *mdev, u32 eth_proto_lp, struct ethtool_link_ksettings *link_ksettings) { unsigned long *lp_advertising = link_ksettings->link_modes.lp_advertising; + bool ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); - ptys2ethtool_adver_link(mdev, lp_advertising, eth_proto_lp); + ptys2ethtool_adver_link(lp_advertising, eth_proto_lp, ext); } int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, @@ -872,6 +875,7 @@ int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, u8 an_disable_admin; u8 an_status; u8 connector_type; + bool admin_ext; bool ext; int err; @@ -886,6 +890,19 @@ int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, eth_proto_capability); eth_proto_admin = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_admin); + /* Fields: eth_proto_admin and ext_eth_proto_admin are + * mutually exclusive. Hence try reading legacy advertising + * when extended advertising is zero. + * admin_ext indicates how eth_proto_admin should be + * interpreted + */ + admin_ext = ext; + if (ext && !eth_proto_admin) { + eth_proto_admin = MLX5_GET_ETH_PROTO(ptys_reg, out, false, + eth_proto_admin); + admin_ext = false; + } + eth_proto_oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper); eth_proto_lp = MLX5_GET(ptys_reg, out, eth_proto_lp_advertise); @@ -899,7 +916,8 @@ int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising); get_supported(mdev, eth_proto_cap, link_ksettings); - get_advertising(mdev, eth_proto_admin, tx_pause, rx_pause, link_ksettings); + get_advertising(eth_proto_admin, tx_pause, rx_pause, link_ksettings, + admin_ext); get_speed_duplex(priv->netdev, eth_proto_oper, link_ksettings); eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap; @@ -997,19 +1015,17 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, #define MLX5E_PTYS_EXT ((1ULL << ETHTOOL_LINK_MODE_50000baseKR_Full_BIT) - 1) - ext_requested = (link_ksettings->link_modes.advertising[0] > - MLX5E_PTYS_EXT); + ext_requested = !!(link_ksettings->link_modes.advertising[0] > + MLX5E_PTYS_EXT || + link_ksettings->link_modes.advertising[1]); ext_supported = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); - - /*when ptys_extended_ethernet is set legacy link modes are deprecated */ - if (ext_requested != ext_supported) - return -EPROTONOSUPPORT; + ext_requested &= ext_supported; speed = link_ksettings->base.speed; ethtool2ptys_adver_func = ext_requested ? mlx5e_ethtool2ptys_ext_adver_link : mlx5e_ethtool2ptys_adver_link; - err = mlx5_port_query_eth_proto(mdev, 1, ext_supported, &eproto); + err = mlx5_port_query_eth_proto(mdev, 1, ext_requested, &eproto); if (err) { netdev_err(priv->netdev, "%s: query port eth proto failed: %d\n", __func__, err); @@ -1037,7 +1053,7 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, if (!an_changes && link_modes == eproto.admin) goto out; - mlx5_port_set_eth_ptys(mdev, an_disable, link_modes, ext_supported); + mlx5_port_set_eth_ptys(mdev, an_disable, link_modes, ext_requested); mlx5_toggle_port_link(mdev); out: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2158,6 +2158,52 @@ static bool csum_offload_supported(struct mlx5e_priv *priv, return true; } +struct ip_ttl_word { + __u8 ttl; + __u8 protocol; + __sum16 check; +}; + +struct ipv6_hoplimit_word { + __be16 payload_len; + __u8 nexthdr; + __u8 hop_limit; +}; + +static bool is_action_keys_supported(const struct flow_action_entry *act) +{ + u32 mask, offset; + u8 htype; + + htype = act->mangle.htype; + offset = act->mangle.offset; + mask = ~act->mangle.mask; + /* For IPv4 & IPv6 header check 4 byte word, + * to determine that modified fields + * are NOT ttl & hop_limit only. + */ + if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) { + struct ip_ttl_word *ttl_word = + (struct ip_ttl_word *)&mask; + + if (offset != offsetof(struct iphdr, ttl) || + ttl_word->protocol || + ttl_word->check) { + return true; + } + } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) { + struct ipv6_hoplimit_word *hoplimit_word = + (struct ipv6_hoplimit_word *)&mask; + + if (offset != offsetof(struct ipv6hdr, payload_len) || + hoplimit_word->payload_len || + hoplimit_word->nexthdr) { + return true; + } + } + return false; +} + static bool modify_header_match_supported(struct mlx5_flow_spec *spec, struct flow_action *flow_action, u32 actions, @@ -2165,9 +2211,9 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec, { const struct flow_action_entry *act; bool modify_ip_header; - u8 htype, ip_proto; void *headers_v; u16 ethertype; + u8 ip_proto; int i; if (actions & MLX5_FLOW_CONTEXT_ACTION_DECAP) @@ -2187,9 +2233,7 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec, act->id != FLOW_ACTION_ADD) continue; - htype = act->mangle.htype; - if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4 || - htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) { + if (is_action_keys_supported(act)) { modify_ip_header = true; break; } @@ -2340,15 +2384,22 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, return 0; } -static inline int cmp_encap_info(struct ip_tunnel_key *a, - struct ip_tunnel_key *b) +struct encap_key { + struct ip_tunnel_key *ip_tun_key; + int tunnel_type; +}; + +static inline int cmp_encap_info(struct encap_key *a, + struct encap_key *b) { - return memcmp(a, b, sizeof(*a)); + return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) || + a->tunnel_type != b->tunnel_type; } -static inline int hash_encap_info(struct ip_tunnel_key *key) +static inline int hash_encap_info(struct encap_key *key) { - return jhash(key, sizeof(*key), 0); + return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key), + key->tunnel_type); } @@ -2379,7 +2430,7 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, struct mlx5_esw_flow_attr *attr = flow->esw_attr; struct mlx5e_tc_flow_parse_attr *parse_attr; struct ip_tunnel_info *tun_info; - struct ip_tunnel_key *key; + struct encap_key key, e_key; struct mlx5e_encap_entry *e; unsigned short family; uintptr_t hash_key; @@ -2389,13 +2440,16 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, parse_attr = attr->parse_attr; tun_info = &parse_attr->tun_info[out_index]; family = ip_tunnel_info_af(tun_info); - key = &tun_info->key; + key.ip_tun_key = &tun_info->key; + key.tunnel_type = mlx5e_tc_tun_get_type(mirred_dev); - hash_key = hash_encap_info(key); + hash_key = hash_encap_info(&key); hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, encap_hlist, hash_key) { - if (!cmp_encap_info(&e->tun_info.key, key)) { + e_key.ip_tun_key = &e->tun_info.key; + e_key.tunnel_type = e->tunnel_type; + if (!cmp_encap_info(&e_key, &key)) { found = true; break; } @@ -2657,7 +2711,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits || hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) { - err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_KERNEL, + err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_FDB, parse_attr, hdrs, extack); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -105,8 +105,7 @@ static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, opcode, MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); MLX5_SET(modify_nic_vport_context_in, in, field_select.change_event, 1); MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); - if (vport) - MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in, nic_vport_context); @@ -134,8 +133,7 @@ static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport, MLX5_SET(modify_esw_vport_context_in, in, opcode, MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT); MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport); - if (vport) - MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1); + MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1); return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); } @@ -431,6 +429,8 @@ static int esw_create_legacy_table(struct mlx5_eswitch *esw) { int err; + memset(&esw->fdb_table.legacy, 0, sizeof(struct legacy_fdb)); + err = esw_create_legacy_vepa_table(esw); if (err) return err; @@ -2157,6 +2157,7 @@ static int _mlx5_eswitch_set_vepa_locked(struct mlx5_eswitch *esw, /* Star rule to forward all traffic to uplink vport */ memset(spec, 0, sizeof(*spec)); + memset(&dest, 0, sizeof(dest)); dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest.vport.num = MLX5_VPORT_UPLINK; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1611,6 +1611,7 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw, int nvports) { int err; + memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb)); mutex_init(&esw->fdb_table.offloads.fdb_prio_lock); err = esw_create_offloads_fdb_tables(esw, nvports); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c @@ -217,15 +217,21 @@ int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq, void *cmd; int ret; + rcu_read_lock(); + flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle)); + rcu_read_unlock(); + + if (!flow) { + WARN_ONCE(1, "Received NULL pointer for handle\n"); + return -EINVAL; + } + buf = kzalloc(size, GFP_ATOMIC); if (!buf) return -ENOMEM; cmd = (buf + 1); - rcu_read_lock(); - flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle)); - rcu_read_unlock(); mlx5_fpga_tls_flow_to_cmd(flow, cmd); MLX5_SET(tls_cmd, cmd, swid, ntohl(handle)); @@ -238,6 +244,8 @@ int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq, buf->complete = mlx_tls_kfree_complete; ret = mlx5_fpga_sbu_conn_sendmsg(mdev->fpga->tls->conn, buf); + if (ret < 0) + kfree(buf); return ret; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -164,26 +164,6 @@ static struct mlx5_profile profile[] = { .size = 8, .limit = 4 }, - .mr_cache[16] = { - .size = 8, - .limit = 4 - }, - .mr_cache[17] = { - .size = 8, - .limit = 4 - }, - .mr_cache[18] = { - .size = 8, - .limit = 4 - }, - .mr_cache[19] = { - .size = 4, - .limit = 2 - }, - .mr_cache[20] = { - .size = 4, - .limit = 2 - }, }, }; diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -48,8 +48,7 @@ nfp_fl_push_vlan(struct nfp_fl_push_vlan *push_vlan, tmp_push_vlan_tci = FIELD_PREP(NFP_FL_PUSH_VLAN_PRIO, act->vlan.prio) | - FIELD_PREP(NFP_FL_PUSH_VLAN_VID, act->vlan.vid) | - NFP_FL_PUSH_VLAN_CFI; + FIELD_PREP(NFP_FL_PUSH_VLAN_VID, act->vlan.vid); push_vlan->vlan_tci = cpu_to_be16(tmp_push_vlan_tci); } diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -26,7 +26,7 @@ #define NFP_FLOWER_LAYER2_GENEVE_OP BIT(6) #define NFP_FLOWER_MASK_VLAN_PRIO GENMASK(15, 13) -#define NFP_FLOWER_MASK_VLAN_CFI BIT(12) +#define NFP_FLOWER_MASK_VLAN_PRESENT BIT(12) #define NFP_FLOWER_MASK_VLAN_VID GENMASK(11, 0) #define NFP_FLOWER_MASK_MPLS_LB GENMASK(31, 12) @@ -82,7 +82,6 @@ #define NFP_FL_OUT_FLAGS_TYPE_IDX GENMASK(2, 0) #define NFP_FL_PUSH_VLAN_PRIO GENMASK(15, 13) -#define NFP_FL_PUSH_VLAN_CFI BIT(12) #define NFP_FL_PUSH_VLAN_VID GENMASK(11, 0) #define IPV6_FLOW_LABEL_MASK cpu_to_be32(0x000fffff) diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c @@ -30,20 +30,19 @@ nfp_flower_compile_meta_tci(struct nfp_flower_meta_tci *ext, flow_rule_match_vlan(rule, &match); /* Populate the tci field. */ - if (match.key->vlan_id || match.key->vlan_priority) { - tmp_tci = FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO, - match.key->vlan_priority) | - FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID, - match.key->vlan_id) | - NFP_FLOWER_MASK_VLAN_CFI; - ext->tci = cpu_to_be16(tmp_tci); - tmp_tci = FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO, - match.mask->vlan_priority) | - FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID, - match.mask->vlan_id) | - NFP_FLOWER_MASK_VLAN_CFI; - msk->tci = cpu_to_be16(tmp_tci); - } + tmp_tci = NFP_FLOWER_MASK_VLAN_PRESENT; + tmp_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO, + match.key->vlan_priority) | + FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID, + match.key->vlan_id); + ext->tci = cpu_to_be16(tmp_tci); + + tmp_tci = NFP_FLOWER_MASK_VLAN_PRESENT; + tmp_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO, + match.mask->vlan_priority) | + FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID, + match.mask->vlan_id); + msk->tci = cpu_to_be16(tmp_tci); } } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c @@ -195,7 +195,7 @@ static netdev_tx_t nfp_repr_xmit(struct sk_buff *skb, struct net_device *netdev) ret = dev_queue_xmit(skb); nfp_repr_inc_tx_stats(netdev, len, ret); - return ret; + return NETDEV_TX_OK; } static int nfp_repr_stop(struct net_device *netdev) @@ -383,7 +383,7 @@ int nfp_repr_init(struct nfp_app *app, struct net_device *netdev, netdev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6); netdev->gso_max_segs = NFP_NET_LSO_MAX_SEGS; - netdev->priv_flags |= IFF_NO_QUEUE; + netdev->priv_flags |= IFF_NO_QUEUE | IFF_DISABLE_NETPOLL; netdev->features |= NETIF_F_LLTX; if (nfp_app_has_tc(app)) { diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c @@ -5460,7 +5460,7 @@ static void rtl_hw_start_8168(struct rtl8169_private *tp) tp->cp_cmd |= PktCntrDisable | INTT_1; RTL_W16(tp, CPlusCmd, tp->cp_cmd); - RTL_W16(tp, IntrMitigate, 0x5151); + RTL_W16(tp, IntrMitigate, 0x5100); /* Work around for RxFIFO overflow. */ if (tp->mac_version == RTL_GIGA_MAC_VER_11) { diff --git a/drivers/net/ethernet/stmicro/stmmac/descs_com.h b/drivers/net/ethernet/stmicro/stmmac/descs_com.h @@ -29,11 +29,13 @@ /* Specific functions used for Ring mode */ /* Enhanced descriptors */ -static inline void ehn_desc_rx_set_on_ring(struct dma_desc *p, int end) +static inline void ehn_desc_rx_set_on_ring(struct dma_desc *p, int end, + int bfsize) { - p->des1 |= cpu_to_le32((BUF_SIZE_8KiB - << ERDES1_BUFFER2_SIZE_SHIFT) - & ERDES1_BUFFER2_SIZE_MASK); + if (bfsize == BUF_SIZE_16KiB) + p->des1 |= cpu_to_le32((BUF_SIZE_8KiB + << ERDES1_BUFFER2_SIZE_SHIFT) + & ERDES1_BUFFER2_SIZE_MASK); if (end) p->des1 |= cpu_to_le32(ERDES1_END_RING); @@ -59,11 +61,15 @@ static inline void enh_set_tx_desc_len_on_ring(struct dma_desc *p, int len) } /* Normal descriptors */ -static inline void ndesc_rx_set_on_ring(struct dma_desc *p, int end) +static inline void ndesc_rx_set_on_ring(struct dma_desc *p, int end, int bfsize) { - p->des1 |= cpu_to_le32(((BUF_SIZE_2KiB - 1) - << RDES1_BUFFER2_SIZE_SHIFT) - & RDES1_BUFFER2_SIZE_MASK); + if (bfsize >= BUF_SIZE_2KiB) { + int bfsize2; + + bfsize2 = min(bfsize - BUF_SIZE_2KiB + 1, BUF_SIZE_2KiB - 1); + p->des1 |= cpu_to_le32((bfsize2 << RDES1_BUFFER2_SIZE_SHIFT) + & RDES1_BUFFER2_SIZE_MASK); + } if (end) p->des1 |= cpu_to_le32(RDES1_END_RING); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@ -296,7 +296,7 @@ exit: } static void dwmac4_rd_init_rx_desc(struct dma_desc *p, int disable_rx_ic, - int mode, int end) + int mode, int end, int bfsize) { dwmac4_set_rx_owner(p, disable_rx_ic); } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c @@ -123,7 +123,7 @@ static int dwxgmac2_get_rx_timestamp_status(void *desc, void *next_desc, } static void dwxgmac2_init_rx_desc(struct dma_desc *p, int disable_rx_ic, - int mode, int end) + int mode, int end, int bfsize) { dwxgmac2_set_rx_owner(p, disable_rx_ic); } diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c @@ -201,6 +201,11 @@ static int enh_desc_get_rx_status(void *data, struct stmmac_extra_stats *x, if (unlikely(rdes0 & RDES0_OWN)) return dma_own; + if (unlikely(!(rdes0 & RDES0_LAST_DESCRIPTOR))) { + stats->rx_length_errors++; + return discard_frame; + } + if (unlikely(rdes0 & RDES0_ERROR_SUMMARY)) { if (unlikely(rdes0 & RDES0_DESCRIPTOR_ERROR)) { x->rx_desc++; @@ -231,9 +236,10 @@ static int enh_desc_get_rx_status(void *data, struct stmmac_extra_stats *x, * It doesn't match with the information reported into the databook. * At any rate, we need to understand if the CSUM hw computation is ok * and report this info to the upper layers. */ - ret = enh_desc_coe_rdes0(!!(rdes0 & RDES0_IPC_CSUM_ERROR), - !!(rdes0 & RDES0_FRAME_TYPE), - !!(rdes0 & ERDES0_RX_MAC_ADDR)); + if (likely(ret == good_frame)) + ret = enh_desc_coe_rdes0(!!(rdes0 & RDES0_IPC_CSUM_ERROR), + !!(rdes0 & RDES0_FRAME_TYPE), + !!(rdes0 & ERDES0_RX_MAC_ADDR)); if (unlikely(rdes0 & RDES0_DRIBBLING)) x->dribbling_bit++; @@ -259,15 +265,19 @@ static int enh_desc_get_rx_status(void *data, struct stmmac_extra_stats *x, } static void enh_desc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, - int mode, int end) + int mode, int end, int bfsize) { + int bfsize1; + p->des0 |= cpu_to_le32(RDES0_OWN); - p->des1 |= cpu_to_le32(BUF_SIZE_8KiB & ERDES1_BUFFER1_SIZE_MASK); + + bfsize1 = min(bfsize, BUF_SIZE_8KiB); + p->des1 |= cpu_to_le32(bfsize1 & ERDES1_BUFFER1_SIZE_MASK); if (mode == STMMAC_CHAIN_MODE) ehn_desc_rx_set_on_chain(p); else - ehn_desc_rx_set_on_ring(p, end); + ehn_desc_rx_set_on_ring(p, end, bfsize); if (disable_rx_ic) p->des1 |= cpu_to_le32(ERDES1_DISABLE_IC); diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -33,7 +33,7 @@ struct dma_extended_desc; struct stmmac_desc_ops { /* DMA RX descriptor ring initialization */ void (*init_rx_desc)(struct dma_desc *p, int disable_rx_ic, int mode, - int end); + int end, int bfsize); /* DMA TX descriptor ring initialization */ void (*init_tx_desc)(struct dma_desc *p, int mode, int end); /* Invoked by the xmit function to prepare the tx descriptor */ diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c @@ -91,8 +91,6 @@ static int ndesc_get_rx_status(void *data, struct stmmac_extra_stats *x, return dma_own; if (unlikely(!(rdes0 & RDES0_LAST_DESCRIPTOR))) { - pr_warn("%s: Oversized frame spanned multiple buffers\n", - __func__); stats->rx_length_errors++; return discard_frame; } @@ -135,15 +133,19 @@ static int ndesc_get_rx_status(void *data, struct stmmac_extra_stats *x, } static void ndesc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode, - int end) + int end, int bfsize) { + int bfsize1; + p->des0 |= cpu_to_le32(RDES0_OWN); - p->des1 |= cpu_to_le32((BUF_SIZE_2KiB - 1) & RDES1_BUFFER1_SIZE_MASK); + + bfsize1 = min(bfsize, BUF_SIZE_2KiB - 1); + p->des1 |= cpu_to_le32(bfsize & RDES1_BUFFER1_SIZE_MASK); if (mode == STMMAC_CHAIN_MODE) ndesc_rx_set_on_chain(p, end); else - ndesc_rx_set_on_ring(p, end); + ndesc_rx_set_on_ring(p, end, bfsize); if (disable_rx_ic) p->des1 |= cpu_to_le32(RDES1_DISABLE_IC); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1136,11 +1136,13 @@ static void stmmac_clear_rx_descriptors(struct stmmac_priv *priv, u32 queue) if (priv->extend_desc) stmmac_init_rx_desc(priv, &rx_q->dma_erx[i].basic, priv->use_riwt, priv->mode, - (i == DMA_RX_SIZE - 1)); + (i == DMA_RX_SIZE - 1), + priv->dma_buf_sz); else stmmac_init_rx_desc(priv, &rx_q->dma_rx[i], priv->use_riwt, priv->mode, - (i == DMA_RX_SIZE - 1)); + (i == DMA_RX_SIZE - 1), + priv->dma_buf_sz); } /** @@ -3352,9 +3354,8 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) { struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; struct stmmac_channel *ch = &priv->channel[queue]; - unsigned int entry = rx_q->cur_rx; + unsigned int next_entry = rx_q->cur_rx; int coe = priv->hw->rx_csum; - unsigned int next_entry; unsigned int count = 0; bool xmac; @@ -3372,10 +3373,12 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) stmmac_display_ring(priv, rx_head, DMA_RX_SIZE, true); } while (count < limit) { - int status; + int entry, status; struct dma_desc *p; struct dma_desc *np; + entry = next_entry; + if (priv->extend_desc) p = (struct dma_desc *)(rx_q->dma_erx + entry); else @@ -3431,11 +3434,12 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) * ignored */ if (frame_len > priv->dma_buf_sz) { - netdev_err(priv->dev, - "len %d larger than size (%d)\n", - frame_len, priv->dma_buf_sz); + if (net_ratelimit()) + netdev_err(priv->dev, + "len %d larger than size (%d)\n", + frame_len, priv->dma_buf_sz); priv->dev->stats.rx_length_errors++; - break; + continue; } /* ACS is set; GMAC core strips PAD/FCS for IEEE 802.3 @@ -3470,7 +3474,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) dev_warn(priv->device, "packet dropped\n"); priv->dev->stats.rx_dropped++; - break; + continue; } dma_sync_single_for_cpu(priv->device, @@ -3490,11 +3494,12 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) } else { skb = rx_q->rx_skbuff[entry]; if (unlikely(!skb)) { - netdev_err(priv->dev, - "%s: Inconsistent Rx chain\n", - priv->dev->name); + if (net_ratelimit()) + netdev_err(priv->dev, + "%s: Inconsistent Rx chain\n", + priv->dev->name); priv->dev->stats.rx_dropped++; - break; + continue; } prefetch(skb->data - NET_IP_ALIGN); rx_q->rx_skbuff[entry] = NULL; @@ -3529,7 +3534,6 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) priv->dev->stats.rx_packets++; priv->dev->stats.rx_bytes += frame_len; } - entry = next_entry; } stmmac_rx_refill(priv, queue); diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h @@ -987,6 +987,7 @@ struct netvsc_device { wait_queue_head_t wait_drain; bool destroy; + bool tx_disable; /* if true, do not wake up queue again */ /* Receive buffer allocated by us but manages by NetVSP */ void *recv_buf; diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c @@ -110,6 +110,7 @@ static struct netvsc_device *alloc_net_device(void) init_waitqueue_head(&net_device->wait_drain); net_device->destroy = false; + net_device->tx_disable = false; net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT; net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT; @@ -719,7 +720,7 @@ static void netvsc_send_tx_complete(struct net_device *ndev, } else { struct netdev_queue *txq = netdev_get_tx_queue(ndev, q_idx); - if (netif_tx_queue_stopped(txq) && + if (netif_tx_queue_stopped(txq) && !net_device->tx_disable && (hv_get_avail_to_write_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER || queue_sends < 1)) { netif_tx_wake_queue(txq); @@ -874,7 +875,8 @@ static inline int netvsc_send_pkt( } else if (ret == -EAGAIN) { netif_tx_stop_queue(txq); ndev_ctx->eth_stats.stop_queue++; - if (atomic_read(&nvchan->queue_sends) < 1) { + if (atomic_read(&nvchan->queue_sends) < 1 && + !net_device->tx_disable) { netif_tx_wake_queue(txq); ndev_ctx->eth_stats.wake_queue++; ret = -ENOSPC; diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c @@ -109,6 +109,15 @@ static void netvsc_set_rx_mode(struct net_device *net) rcu_read_unlock(); } +static void netvsc_tx_enable(struct netvsc_device *nvscdev, + struct net_device *ndev) +{ + nvscdev->tx_disable = false; + virt_wmb(); /* ensure queue wake up mechanism is on */ + + netif_tx_wake_all_queues(ndev); +} + static int netvsc_open(struct net_device *net) { struct net_device_context *ndev_ctx = netdev_priv(net); @@ -129,7 +138,7 @@ static int netvsc_open(struct net_device *net) rdev = nvdev->extension; if (!rdev->link_state) { netif_carrier_on(net); - netif_tx_wake_all_queues(net); + netvsc_tx_enable(nvdev, net); } if (vf_netdev) { @@ -184,6 +193,17 @@ static int netvsc_wait_until_empty(struct netvsc_device *nvdev) } } +static void netvsc_tx_disable(struct netvsc_device *nvscdev, + struct net_device *ndev) +{ + if (nvscdev) { + nvscdev->tx_disable = true; + virt_wmb(); /* ensure txq will not wake up after stop */ + } + + netif_tx_disable(ndev); +} + static int netvsc_close(struct net_device *net) { struct net_device_context *net_device_ctx = netdev_priv(net); @@ -192,7 +212,7 @@ static int netvsc_close(struct net_device *net) struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev); int ret; - netif_tx_disable(net); + netvsc_tx_disable(nvdev, net); /* No need to close rndis filter if it is removed already */ if (!nvdev) @@ -920,7 +940,7 @@ static int netvsc_detach(struct net_device *ndev, /* If device was up (receiving) then shutdown */ if (netif_running(ndev)) { - netif_tx_disable(ndev); + netvsc_tx_disable(nvdev, ndev); ret = rndis_filter_close(nvdev); if (ret) { @@ -1908,7 +1928,7 @@ static void netvsc_link_change(struct work_struct *w) if (rdev->link_state) { rdev->link_state = false; netif_carrier_on(net); - netif_tx_wake_all_queues(net); + netvsc_tx_enable(net_device, net); } else { notify = true; } @@ -1918,7 +1938,7 @@ static void netvsc_link_change(struct work_struct *w) if (!rdev->link_state) { rdev->link_state = true; netif_carrier_off(net); - netif_tx_stop_all_queues(net); + netvsc_tx_disable(net_device, net); } kfree(event); break; @@ -1927,7 +1947,7 @@ static void netvsc_link_change(struct work_struct *w) if (!rdev->link_state) { rdev->link_state = true; netif_carrier_off(net); - netif_tx_stop_all_queues(net); + netvsc_tx_disable(net_device, net); event->event = RNDIS_STATUS_MEDIA_CONNECT; spin_lock_irqsave(&ndev_ctx->lock, flags); list_add(&event->list, &ndev_ctx->reconfig_events); diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c @@ -1203,6 +1203,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x19d2, 0x2002, 4)}, /* ZTE (Vodafone) K3765-Z */ {QMI_FIXED_INTF(0x2001, 0x7e19, 4)}, /* D-Link DWM-221 B1 */ {QMI_FIXED_INTF(0x2001, 0x7e35, 4)}, /* D-Link DWM-222 */ + {QMI_FIXED_INTF(0x2020, 0x2031, 4)}, /* Olicard 600 */ {QMI_FIXED_INTF(0x2020, 0x2033, 4)}, /* BroadMobi BM806U */ {QMI_FIXED_INTF(0x0f3d, 0x68a2, 8)}, /* Sierra Wireless MC7700 */ {QMI_FIXED_INTF(0x114f, 0x68a2, 8)}, /* Sierra Wireless MC7750 */ diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c @@ -1273,6 +1273,7 @@ static void vrf_setup(struct net_device *dev) /* default to no qdisc; user can add if desired */ dev->priv_flags |= IFF_NO_QUEUE; + dev->priv_flags |= IFF_NO_RX_HANDLER; dev->min_mtu = 0; dev->max_mtu = 0; diff --git a/include/linux/mii.h b/include/linux/mii.h @@ -469,7 +469,7 @@ static inline u32 linkmode_adv_to_lcl_adv_t(unsigned long *advertising) if (linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, advertising)) lcl_adv |= ADVERTISE_PAUSE_CAP; - if (linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, + if (linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, advertising)) lcl_adv |= ADVERTISE_PAUSE_ASYM; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h @@ -594,6 +594,8 @@ enum mlx5_pagefault_type_flags { }; struct mlx5_td { + /* protects tirs list changes while tirs refresh */ + struct mutex list_lock; struct list_head tirs_list; u32 tdn; }; diff --git a/include/net/ip.h b/include/net/ip.h @@ -677,7 +677,7 @@ int ip_options_get_from_user(struct net *net, struct ip_options_rcu **optp, unsigned char __user *data, int optlen); void ip_options_undo(struct ip_options *opt); void ip_forward_options(struct sk_buff *skb); -int ip_options_rcv_srr(struct sk_buff *skb); +int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev); /* * Functions provided by ip_sockglue.c diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h @@ -59,6 +59,7 @@ struct net { */ spinlock_t rules_mod_lock; + u32 hash_mix; atomic64_t cookie_gen; struct list_head list; /* list of network namespaces */ diff --git a/include/net/netns/hash.h b/include/net/netns/hash.h @@ -2,16 +2,10 @@ #ifndef __NET_NS_HASH_H__ #define __NET_NS_HASH_H__ -#include <asm/cache.h> - -struct net; +#include <net/net_namespace.h> static inline u32 net_hash_mix(const struct net *net) { -#ifdef CONFIG_NET_NS - return (u32)(((unsigned long)net) >> ilog2(sizeof(*net))); -#else - return 0; -#endif + return net->hash_mix; } #endif diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h @@ -923,6 +923,41 @@ static inline void qdisc_qstats_overlimit(struct Qdisc *sch) sch->qstats.overlimits++; } +static inline int qdisc_qstats_copy(struct gnet_dump *d, struct Qdisc *sch) +{ + __u32 qlen = qdisc_qlen_sum(sch); + + return gnet_stats_copy_queue(d, sch->cpu_qstats, &sch->qstats, qlen); +} + +static inline void qdisc_qstats_qlen_backlog(struct Qdisc *sch, __u32 *qlen, + __u32 *backlog) +{ + struct gnet_stats_queue qstats = { 0 }; + __u32 len = qdisc_qlen_sum(sch); + + __gnet_stats_copy_queue(&qstats, sch->cpu_qstats, &sch->qstats, len); + *qlen = qstats.qlen; + *backlog = qstats.backlog; +} + +static inline void qdisc_tree_flush_backlog(struct Qdisc *sch) +{ + __u32 qlen, backlog; + + qdisc_qstats_qlen_backlog(sch, &qlen, &backlog); + qdisc_tree_reduce_backlog(sch, qlen, backlog); +} + +static inline void qdisc_purge_queue(struct Qdisc *sch) +{ + __u32 qlen, backlog; + + qdisc_qstats_qlen_backlog(sch, &qlen, &backlog); + qdisc_reset(sch); + qdisc_tree_reduce_backlog(sch, qlen, backlog); +} + static inline void qdisc_skb_head_init(struct qdisc_skb_head *qh) { qh->head = NULL; @@ -1106,13 +1141,8 @@ static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new, sch_tree_lock(sch); old = *pold; *pold = new; - if (old != NULL) { - unsigned int qlen = old->q.qlen; - unsigned int backlog = old->qstats.backlog; - - qdisc_reset(old); - qdisc_tree_reduce_backlog(old, qlen, backlog); - } + if (old != NULL) + qdisc_tree_flush_backlog(old); sch_tree_unlock(sch); return old; diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c @@ -162,10 +162,14 @@ static void cpu_map_kthread_stop(struct work_struct *work) static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf) { + unsigned int hard_start_headroom; unsigned int frame_size; void *pkt_data_start; struct sk_buff *skb; + /* Part of headroom was reserved to xdpf */ + hard_start_headroom = sizeof(struct xdp_frame) + xdpf->headroom; + /* build_skb need to place skb_shared_info after SKB end, and * also want to know the memory "truesize". Thus, need to * know the memory frame size backing xdp_buff. @@ -183,15 +187,15 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu, * is not at a fixed memory location, with mixed length * packets, which is bad for cache-line hotness. */ - frame_size = SKB_DATA_ALIGN(xdpf->len + xdpf->headroom) + + frame_size = SKB_DATA_ALIGN(xdpf->len + hard_start_headroom) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - pkt_data_start = xdpf->data - xdpf->headroom; + pkt_data_start = xdpf->data - hard_start_headroom; skb = build_skb(pkt_data_start, frame_size); if (!skb) return NULL; - skb_reserve(skb, xdpf->headroom); + skb_reserve(skb, hard_start_headroom); __skb_put(skb, xdpf->len); if (xdpf->metasize) skb_metadata_set(skb, xdpf->metasize); @@ -205,6 +209,9 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu, * - RX ring dev queue index (skb_record_rx_queue) */ + /* Allow SKB to reuse area used by xdp_frame */ + xdp_scrub_frame(xdpf); + return skb; } diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c @@ -554,19 +554,6 @@ struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type typ } EXPORT_SYMBOL(bpf_prog_get_type_path); -static void bpf_evict_inode(struct inode *inode) -{ - enum bpf_type type; - - truncate_inode_pages_final(&inode->i_data); - clear_inode(inode); - - if (S_ISLNK(inode->i_mode)) - kfree(inode->i_link); - if (!bpf_inode_type(inode, &type)) - bpf_any_put(inode->i_private, type); -} - /* * Display the mount options in /proc/mounts. */ @@ -579,11 +566,28 @@ static int bpf_show_options(struct seq_file *m, struct dentry *root) return 0; } +static void bpf_destroy_inode_deferred(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + enum bpf_type type; + + if (S_ISLNK(inode->i_mode)) + kfree(inode->i_link); + if (!bpf_inode_type(inode, &type)) + bpf_any_put(inode->i_private, type); + free_inode_nonrcu(inode); +} + +static void bpf_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, bpf_destroy_inode_deferred); +} + static const struct super_operations bpf_super_ops = { .statfs = simple_statfs, .drop_inode = generic_delete_inode, .show_options = bpf_show_options, - .evict_inode = bpf_evict_inode, + .destroy_inode = bpf_destroy_inode, }; enum { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c @@ -1897,8 +1897,9 @@ continue_func: } frame++; if (frame >= MAX_CALL_FRAMES) { - WARN_ONCE(1, "verifier bug. Call stack is too deep\n"); - return -EFAULT; + verbose(env, "the call stack of %d frames is too deep !\n", + frame); + return -E2BIG; } goto process_func; } diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c @@ -443,27 +443,29 @@ static int vlan_dev_fcoe_disable(struct net_device *dev) return rc; } -static int vlan_dev_fcoe_get_wwn(struct net_device *dev, u64 *wwn, int type) +static int vlan_dev_fcoe_ddp_target(struct net_device *dev, u16 xid, + struct scatterlist *sgl, unsigned int sgc) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; const struct net_device_ops *ops = real_dev->netdev_ops; - int rc = -EINVAL; + int rc = 0; + + if (ops->ndo_fcoe_ddp_target) + rc = ops->ndo_fcoe_ddp_target(real_dev, xid, sgl, sgc); - if (ops->ndo_fcoe_get_wwn) - rc = ops->ndo_fcoe_get_wwn(real_dev, wwn, type); return rc; } +#endif -static int vlan_dev_fcoe_ddp_target(struct net_device *dev, u16 xid, - struct scatterlist *sgl, unsigned int sgc) +#ifdef NETDEV_FCOE_WWNN +static int vlan_dev_fcoe_get_wwn(struct net_device *dev, u64 *wwn, int type) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; const struct net_device_ops *ops = real_dev->netdev_ops; - int rc = 0; - - if (ops->ndo_fcoe_ddp_target) - rc = ops->ndo_fcoe_ddp_target(real_dev, xid, sgl, sgc); + int rc = -EINVAL; + if (ops->ndo_fcoe_get_wwn) + rc = ops->ndo_fcoe_get_wwn(real_dev, wwn, type); return rc; } #endif @@ -794,9 +796,11 @@ static const struct net_device_ops vlan_netdev_ops = { .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done, .ndo_fcoe_enable = vlan_dev_fcoe_enable, .ndo_fcoe_disable = vlan_dev_fcoe_disable, - .ndo_fcoe_get_wwn = vlan_dev_fcoe_get_wwn, .ndo_fcoe_ddp_target = vlan_dev_fcoe_ddp_target, #endif +#ifdef NETDEV_FCOE_WWNN + .ndo_fcoe_get_wwn = vlan_dev_fcoe_get_wwn, +#endif #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = vlan_dev_poll_controller, .ndo_netpoll_setup = vlan_dev_netpoll_setup, diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c @@ -104,8 +104,10 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh) ret = cfg80211_get_station(real_netdev, neigh->addr, &sinfo); - /* free the TID stats immediately */ - cfg80211_sinfo_release_content(&sinfo); + if (!ret) { + /* free the TID stats immediately */ + cfg80211_sinfo_release_content(&sinfo); + } dev_put(real_netdev); if (ret == -ENOENT) { diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c @@ -803,6 +803,8 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, const u8 *mac, const unsigned short vid) { struct batadv_bla_claim search_claim, *claim; + struct batadv_bla_claim *claim_removed_entry; + struct hlist_node *claim_removed_node; ether_addr_copy(search_claim.addr, mac); search_claim.vid = vid; @@ -813,10 +815,18 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_BLA, bat_priv, "%s(): %pM, vid %d\n", __func__, mac, batadv_print_vid(vid)); - batadv_hash_remove(bat_priv->bla.claim_hash, batadv_compare_claim, - batadv_choose_claim, claim); - batadv_claim_put(claim); /* reference from the hash is gone */ + claim_removed_node = batadv_hash_remove(bat_priv->bla.claim_hash, + batadv_compare_claim, + batadv_choose_claim, claim); + if (!claim_removed_node) + goto free_claim; + /* reference from the hash is gone */ + claim_removed_entry = hlist_entry(claim_removed_node, + struct batadv_bla_claim, hash_entry); + batadv_claim_put(claim_removed_entry); + +free_claim: /* don't need the reference from hash_find() anymore */ batadv_claim_put(claim); } diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c @@ -1116,9 +1116,9 @@ static ssize_t batadv_store_throughput_override(struct kobject *kobj, struct attribute *attr, char *buff, size_t count) { - struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); struct net_device *net_dev = batadv_kobj_to_netdev(kobj); struct batadv_hard_iface *hard_iface; + struct batadv_priv *bat_priv; u32 tp_override; u32 old_tp_override; bool ret; @@ -1147,7 +1147,10 @@ static ssize_t batadv_store_throughput_override(struct kobject *kobj, atomic_set(&hard_iface->bat_v.throughput_override, tp_override); - batadv_netlink_notify_hardif(bat_priv, hard_iface); + if (hard_iface->soft_iface) { + bat_priv = netdev_priv(hard_iface->soft_iface); + batadv_netlink_notify_hardif(bat_priv, hard_iface); + } out: batadv_hardif_put(hard_iface); diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c @@ -616,14 +616,26 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv, struct batadv_tt_global_entry *tt_global, const char *message) { + struct batadv_tt_global_entry *tt_removed_entry; + struct hlist_node *tt_removed_node; + batadv_dbg(BATADV_DBG_TT, bat_priv, "Deleting global tt entry %pM (vid: %d): %s\n", tt_global->common.addr, batadv_print_vid(tt_global->common.vid), message); - batadv_hash_remove(bat_priv->tt.global_hash, batadv_compare_tt, - batadv_choose_tt, &tt_global->common); - batadv_tt_global_entry_put(tt_global); + tt_removed_node = batadv_hash_remove(bat_priv->tt.global_hash, + batadv_compare_tt, + batadv_choose_tt, + &tt_global->common); + if (!tt_removed_node) + return; + + /* drop reference of remove hash entry */ + tt_removed_entry = hlist_entry(tt_removed_node, + struct batadv_tt_global_entry, + common.hash_entry); + batadv_tt_global_entry_put(tt_removed_entry); } /** @@ -1337,9 +1349,10 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr, unsigned short vid, const char *message, bool roaming) { + struct batadv_tt_local_entry *tt_removed_entry; struct batadv_tt_local_entry *tt_local_entry; u16 flags, curr_flags = BATADV_NO_FLAGS; - void *tt_entry_exists; + struct hlist_node *tt_removed_node; tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr, vid); if (!tt_local_entry) @@ -1368,15 +1381,18 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr, */ batadv_tt_local_event(bat_priv, tt_local_entry, BATADV_TT_CLIENT_DEL); - tt_entry_exists = batadv_hash_remove(bat_priv->tt.local_hash, + tt_removed_node = batadv_hash_remove(bat_priv->tt.local_hash, batadv_compare_tt, batadv_choose_tt, &tt_local_entry->common); - if (!tt_entry_exists) + if (!tt_removed_node) goto out; - /* extra call to free the local tt entry */ - batadv_tt_local_entry_put(tt_local_entry); + /* drop reference of remove hash entry */ + tt_removed_entry = hlist_entry(tt_removed_node, + struct batadv_tt_local_entry, + common.hash_entry); + batadv_tt_local_entry_put(tt_removed_entry); out: if (tt_local_entry) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c @@ -601,6 +601,7 @@ static int br_ip4_multicast_add_group(struct net_bridge *br, if (ipv4_is_local_multicast(group)) return 0; + memset(&br_group, 0, sizeof(br_group)); br_group.u.ip4 = group; br_group.proto = htons(ETH_P_IP); br_group.vid = vid; @@ -1497,6 +1498,7 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br, own_query = port ? &port->ip4_own_query : &br->ip4_own_query; + memset(&br_group, 0, sizeof(br_group)); br_group.u.ip4 = group; br_group.proto = htons(ETH_P_IP); br_group.vid = vid; @@ -1520,6 +1522,7 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br, own_query = port ? &port->ip6_own_query : &br->ip6_own_query; + memset(&br_group, 0, sizeof(br_group)); br_group.u.ip6 = *group; br_group.proto = htons(ETH_P_IPV6); br_group.vid = vid; diff --git a/net/core/datagram.c b/net/core/datagram.c @@ -279,7 +279,7 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags, break; sk_busy_loop(sk, flags & MSG_DONTWAIT); - } while (!skb_queue_empty(&sk->sk_receive_queue)); + } while (sk->sk_receive_queue.prev != *last); error = -EAGAIN; diff --git a/net/core/dev.c b/net/core/dev.c @@ -5014,8 +5014,10 @@ static inline void __netif_receive_skb_list_ptype(struct list_head *head, if (pt_prev->list_func != NULL) pt_prev->list_func(head, pt_prev, orig_dev); else - list_for_each_entry_safe(skb, next, head, list) + list_for_each_entry_safe(skb, next, head, list) { + skb_list_del_init(skb); pt_prev->func(skb, skb->dev, pt_prev, orig_dev); + } } static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemalloc) diff --git a/net/core/ethtool.c b/net/core/ethtool.c @@ -1797,11 +1797,16 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) WARN_ON_ONCE(!ret); gstrings.len = ret; - data = vzalloc(array_size(gstrings.len, ETH_GSTRING_LEN)); - if (gstrings.len && !data) - return -ENOMEM; - __ethtool_get_strings(dev, gstrings.string_set, data); + if (gstrings.len) { + data = vzalloc(array_size(gstrings.len, ETH_GSTRING_LEN)); + if (!data) + return -ENOMEM; + + __ethtool_get_strings(dev, gstrings.string_set, data); + } else { + data = NULL; + } ret = -EFAULT; if (copy_to_user(useraddr, &gstrings, sizeof(gstrings))) @@ -1897,11 +1902,15 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) return -EFAULT; stats.n_stats = n_stats; - data = vzalloc(array_size(n_stats, sizeof(u64))); - if (n_stats && !data) - return -ENOMEM; - ops->get_ethtool_stats(dev, &stats, data); + if (n_stats) { + data = vzalloc(array_size(n_stats, sizeof(u64))); + if (!data) + return -ENOMEM; + ops->get_ethtool_stats(dev, &stats, data); + } else { + data = NULL; + } ret = -EFAULT; if (copy_to_user(useraddr, &stats, sizeof(stats))) @@ -1941,16 +1950,21 @@ static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr) return -EFAULT; stats.n_stats = n_stats; - data = vzalloc(array_size(n_stats, sizeof(u64))); - if (n_stats && !data) - return -ENOMEM; - if (dev->phydev && !ops->get_ethtool_phy_stats) { - ret = phy_ethtool_get_stats(dev->phydev, &stats, data); - if (ret < 0) - return ret; + if (n_stats) { + data = vzalloc(array_size(n_stats, sizeof(u64))); + if (!data) + return -ENOMEM; + + if (dev->phydev && !ops->get_ethtool_phy_stats) { + ret = phy_ethtool_get_stats(dev->phydev, &stats, data); + if (ret < 0) + goto out; + } else { + ops->get_ethtool_phy_stats(dev, &stats, data); + } } else { - ops->get_ethtool_phy_stats(dev, &stats, data); + data = NULL; } ret = -EFAULT; diff --git a/net/core/filter.c b/net/core/filter.c @@ -6613,14 +6613,8 @@ static bool flow_dissector_is_valid_access(int off, int size, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) { - if (type == BPF_WRITE) { - switch (off) { - case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): - break; - default: - return false; - } - } + if (type == BPF_WRITE) + return false; switch (off) { case bpf_ctx_range(struct __sk_buff, data): @@ -6632,11 +6626,7 @@ static bool flow_dissector_is_valid_access(int off, int size, case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): info->reg_type = PTR_TO_FLOW_KEYS; break; - case bpf_ctx_range(struct __sk_buff, tc_classid): - case bpf_ctx_range(struct __sk_buff, data_meta): - case bpf_ctx_range_till(struct __sk_buff, family, local_port): - case bpf_ctx_range(struct __sk_buff, tstamp): - case bpf_ctx_range(struct __sk_buff, wire_len): + default: return false; } diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c @@ -707,6 +707,7 @@ bool __skb_flow_bpf_dissect(struct bpf_prog *prog, /* Pass parameters to the BPF program */ memset(flow_keys, 0, sizeof(*flow_keys)); cb->qdisc_cb.flow_keys = flow_keys; + flow_keys->n_proto = skb->protocol; flow_keys->nhoff = skb_network_offset(skb); flow_keys->thoff = flow_keys->nhoff; @@ -716,7 +717,8 @@ bool __skb_flow_bpf_dissect(struct bpf_prog *prog, /* Restore state */ memcpy(cb, &cb_saved, sizeof(cb_saved)); - flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, 0, skb->len); + flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, + skb_network_offset(skb), skb->len); flow_keys->thoff = clamp_t(u16, flow_keys->thoff, flow_keys->nhoff, skb->len); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c @@ -304,6 +304,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) refcount_set(&net->count, 1); refcount_set(&net->passive, 1); + get_random_bytes(&net->hash_mix, sizeof(u32)); net->dev_base_seq = 1; net->user_ns = user_ns; idr_init(&net->netns_ids); diff --git a/net/core/skbuff.c b/net/core/skbuff.c @@ -3801,7 +3801,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) unsigned int delta_truesize; struct sk_buff *lp; - if (unlikely(p->len + len >= 65536)) + if (unlikely(p->len + len >= 65536 || NAPI_GRO_CB(skb)->flush)) return -E2BIG; lp = NAPI_GRO_CB(p)->last; diff --git a/net/dccp/feat.c b/net/dccp/feat.c @@ -738,7 +738,12 @@ static int __feat_register_sp(struct list_head *fn, u8 feat, u8 is_local, if (dccp_feat_clone_sp_val(&fval, sp_val, sp_len)) return -ENOMEM; - return dccp_feat_push_change(fn, feat, is_local, mandatory, &fval); + if (dccp_feat_push_change(fn, feat, is_local, mandatory, &fval)) { + kfree(fval.sp.vec); + return -ENOMEM; + } + + return 0; } /** diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c @@ -98,8 +98,18 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, return skb; } +static int qca_tag_flow_dissect(const struct sk_buff *skb, __be16 *proto, + int *offset) +{ + *offset = QCA_HDR_LEN; + *proto = ((__be16 *)skb->data)[0]; + + return 0; +} + const struct dsa_device_ops qca_netdev_ops = { .xmit = qca_tag_xmit, .rcv = qca_tag_rcv, + .flow_dissect = qca_tag_flow_dissect, .overhead = QCA_HDR_LEN, }; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c @@ -257,11 +257,10 @@ int ip_local_deliver(struct sk_buff *skb) ip_local_deliver_finish); } -static inline bool ip_rcv_options(struct sk_buff *skb) +static inline bool ip_rcv_options(struct sk_buff *skb, struct net_device *dev) { struct ip_options *opt; const struct iphdr *iph; - struct net_device *dev = skb->dev; /* It looks as overkill, because not all IP options require packet mangling. @@ -297,7 +296,7 @@ static inline bool ip_rcv_options(struct sk_buff *skb) } } - if (ip_options_rcv_srr(skb)) + if (ip_options_rcv_srr(skb, dev)) goto drop; } @@ -353,7 +352,7 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk, } #endif - if (iph->ihl > 5 && ip_rcv_options(skb)) + if (iph->ihl > 5 && ip_rcv_options(skb, dev)) goto drop; rt = skb_rtable(skb); diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c @@ -612,7 +612,7 @@ void ip_forward_options(struct sk_buff *skb) } } -int ip_options_rcv_srr(struct sk_buff *skb) +int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev) { struct ip_options *opt = &(IPCB(skb)->opt); int srrspace, srrptr; @@ -647,7 +647,7 @@ int ip_options_rcv_srr(struct sk_buff *skb) orefdst = skb->_skb_refdst; skb_dst_set(skb, NULL); - err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev); + err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, dev); rt2 = skb_rtable(skb); if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) { skb_dst_drop(skb); diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c @@ -67,11 +67,6 @@ static unsigned int dctcp_alpha_on_init __read_mostly = DCTCP_MAX_ALPHA; module_param(dctcp_alpha_on_init, uint, 0644); MODULE_PARM_DESC(dctcp_alpha_on_init, "parameter for initial alpha value"); -static unsigned int dctcp_clamp_alpha_on_loss __read_mostly; -module_param(dctcp_clamp_alpha_on_loss, uint, 0644); -MODULE_PARM_DESC(dctcp_clamp_alpha_on_loss, - "parameter for clamping alpha on loss"); - static struct tcp_congestion_ops dctcp_reno; static void dctcp_reset(const struct tcp_sock *tp, struct dctcp *ca) @@ -164,21 +159,23 @@ static void dctcp_update_alpha(struct sock *sk, u32 flags) } } -static void dctcp_state(struct sock *sk, u8 new_state) +static void dctcp_react_to_loss(struct sock *sk) { - if (dctcp_clamp_alpha_on_loss && new_state == TCP_CA_Loss) { - struct dctcp *ca = inet_csk_ca(sk); + struct dctcp *ca = inet_csk_ca(sk); + struct tcp_sock *tp = tcp_sk(sk); - /* If this extension is enabled, we clamp dctcp_alpha to - * max on packet loss; the motivation is that dctcp_alpha - * is an indicator to the extend of congestion and packet - * loss is an indicator of extreme congestion; setting - * this in practice turned out to be beneficial, and - * effectively assumes total congestion which reduces the - * window by half. - */ - ca->dctcp_alpha = DCTCP_MAX_ALPHA; - } + ca->loss_cwnd = tp->snd_cwnd; + tp->snd_ssthresh = max(tp->snd_cwnd >> 1U, 2U); +} + +static void dctcp_state(struct sock *sk, u8 new_state) +{ + if (new_state == TCP_CA_Recovery && + new_state != inet_csk(sk)->icsk_ca_state) + dctcp_react_to_loss(sk); + /* We handle RTO in dctcp_cwnd_event to ensure that we perform only + * one loss-adjustment per RTT. + */ } static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev) @@ -190,6 +187,9 @@ static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev) case CA_EVENT_ECN_NO_CE: dctcp_ece_ack_update(sk, ev, &ca->prior_rcv_nxt, &ca->ce_state); break; + case CA_EVENT_LOSS: + dctcp_react_to_loss(sk); + break; default: /* Don't care for the rest. */ break; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c @@ -2578,7 +2578,8 @@ static void __net_exit tcp_sk_exit(struct net *net) { int cpu; - module_put(net->ipv4.tcp_congestion_control->owner); + if (net->ipv4.tcp_congestion_control) + module_put(net->ipv4.tcp_congestion_control->owner); for_each_possible_cpu(cpu) inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu)); diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c @@ -417,6 +417,7 @@ int ila_xlat_nl_cmd_flush(struct sk_buff *skb, struct genl_info *info) done: rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); return ret; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c @@ -601,7 +601,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, inet6_sk(skb->sk) : NULL; struct ipv6hdr *tmp_hdr; struct frag_hdr *fh; - unsigned int mtu, hlen, left, len; + unsigned int mtu, hlen, left, len, nexthdr_offset; int hroom, troom; __be32 frag_id; int ptr, offset = 0, err = 0; @@ -612,6 +612,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, goto fail; hlen = err; nexthdr = *prevhdr; + nexthdr_offset = prevhdr - skb_network_header(skb); mtu = ip6_skb_dst_mtu(skb); @@ -646,6 +647,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, (err = skb_checksum_help(skb))) goto fail; + prevhdr = skb_network_header(skb) + nexthdr_offset; hroom = LL_RESERVED_SPACE(rt->dst.dev); if (skb_has_frag_list(skb)) { unsigned int first_len = skb_pagelen(skb); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c @@ -627,7 +627,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, eiph->daddr, eiph->saddr, 0, 0, IPPROTO_IPIP, RT_TOS(eiph->tos), 0); - if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL) { + if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL6) { if (!IS_ERR(rt)) ip_rt_put(rt); goto out; @@ -636,7 +636,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } else { if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) || - skb_dst(skb2)->dev->type != ARPHRD_TUNNEL) + skb_dst(skb2)->dev->type != ARPHRD_TUNNEL6) goto out; } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c @@ -669,6 +669,10 @@ static int ipip6_rcv(struct sk_buff *skb) !net_eq(tunnel->net, dev_net(tunnel->dev)))) goto out; + /* skb can be uncloned in iptunnel_pull_header, so + * old iph is no longer valid + */ + iph = (const struct iphdr *)skb_mac_header(skb); err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { if (log_ecn_error) diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c @@ -2054,14 +2054,14 @@ static int __init kcm_init(void) if (err) goto fail; - err = sock_register(&kcm_family_ops); - if (err) - goto sock_register_fail; - err = register_pernet_device(&kcm_net_ops); if (err) goto net_ops_fail; + err = sock_register(&kcm_family_ops); + if (err) + goto sock_register_fail; + err = kcm_proc_init(); if (err) goto proc_init_fail; @@ -2069,12 +2069,12 @@ static int __init kcm_init(void) return 0; proc_init_fail: - unregister_pernet_device(&kcm_net_ops); - -net_ops_fail: sock_unregister(PF_KCM); sock_register_fail: + unregister_pernet_device(&kcm_net_ops); + +net_ops_fail: proto_unregister(&kcm_proto); fail: @@ -2090,8 +2090,8 @@ fail: static void __exit kcm_exit(void) { kcm_proc_exit(); - unregister_pernet_device(&kcm_net_ops); sock_unregister(PF_KCM); + unregister_pernet_device(&kcm_net_ops); proto_unregister(&kcm_proto); destroy_workqueue(kcm_wq); diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c @@ -2306,14 +2306,14 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, struct sw_flow_actions *acts; int new_acts_size; - int req_size = NLA_ALIGN(attr_len); + size_t req_size = NLA_ALIGN(attr_len); int next_offset = offsetof(struct sw_flow_actions, actions) + (*sfa)->actions_len; if (req_size <= (ksize(*sfa) - next_offset)) goto out; - new_acts_size = ksize(*sfa) * 2; + new_acts_size = max(next_offset + req_size, ksize(*sfa) * 2); if (new_acts_size > MAX_ACTIONS_BUFSIZE) { if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) { diff --git a/net/rds/tcp.c b/net/rds/tcp.c @@ -608,7 +608,7 @@ static void rds_tcp_kill_sock(struct net *net) list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); - if (net != c_net || !tc->t_sock) + if (net != c_net) continue; if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) { list_move_tail(&tc->t_tcp_node, &tmp_list); diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c @@ -45,8 +45,8 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, struct nlattr *tb[TCA_SAMPLE_MAX + 1]; struct psample_group *psample_group; struct tcf_chain *goto_ch = NULL; + u32 psample_group_num, rate; struct tc_sample *parm; - u32 psample_group_num; struct tcf_sample *s; bool exists = false; int ret, err; @@ -85,6 +85,12 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, if (err < 0) goto release_idr; + rate = nla_get_u32(tb[TCA_SAMPLE_RATE]); + if (!rate) { + NL_SET_ERR_MSG(extack, "invalid sample rate"); + err = -EINVAL; + goto put_chain; + } psample_group_num = nla_get_u32(tb[TCA_SAMPLE_PSAMPLE_GROUP]); psample_group = psample_group_get(net, psample_group_num); if (!psample_group) { @@ -96,7 +102,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, spin_lock_bh(&s->tcf_lock); goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); - s->rate = nla_get_u32(tb[TCA_SAMPLE_RATE]); + s->rate = rate; s->psample_group_num = psample_group_num; RCU_INIT_POINTER(s->psample_group, psample_group); diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c @@ -130,6 +130,11 @@ static void mall_destroy(struct tcf_proto *tp, bool rtnl_held, static void *mall_get(struct tcf_proto *tp, u32 handle) { + struct cls_mall_head *head = rtnl_dereference(tp->root); + + if (head && head->handle == handle) + return head; + return NULL; } diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c @@ -1517,16 +1517,27 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free) static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash) { + int wlen = skb_network_offset(skb); u8 dscp; - switch (skb->protocol) { + switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): + wlen += sizeof(struct iphdr); + if (!pskb_may_pull(skb, wlen) || + skb_try_make_writable(skb, wlen)) + return 0; + dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2; if (wash && dscp) ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0); return dscp; case htons(ETH_P_IPV6): + wlen += sizeof(struct ipv6hdr); + if (!pskb_may_pull(skb, wlen) || + skb_try_make_writable(skb, wlen)) + return 0; + dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2; if (wash && dscp) ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0); diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c @@ -1358,9 +1358,11 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg, { struct cbq_sched_data *q = qdisc_priv(sch); struct cbq_class *cl = (struct cbq_class *)arg; + __u32 qlen; cl->xstats.avgidle = cl->avgidle; cl->xstats.undertime = 0; + qdisc_qstats_qlen_backlog(cl->q, &qlen, &cl->qstats.backlog); if (cl->undertime != PSCHED_PASTPERFECT) cl->xstats.undertime = cl->undertime - q->now; @@ -1368,7 +1370,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg, if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 || gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || - gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->q->q.qlen) < 0) + gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0) return -1; return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats)); @@ -1665,17 +1667,13 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg) { struct cbq_sched_data *q = qdisc_priv(sch); struct cbq_class *cl = (struct cbq_class *)arg; - unsigned int qlen, backlog; if (cl->filters || cl->children || cl == &q->link) return -EBUSY; sch_tree_lock(sch); - qlen = cl->q->q.qlen; - backlog = cl->q->qstats.backlog; - qdisc_reset(cl->q); - qdisc_tree_reduce_backlog(cl->q, qlen, backlog); + qdisc_purge_queue(cl->q); if (cl->next_alive) cbq_deactivate_class(cl); diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c @@ -50,15 +50,6 @@ static struct drr_class *drr_find_class(struct Qdisc *sch, u32 classid) return container_of(clc, struct drr_class, common); } -static void drr_purge_queue(struct drr_class *cl) -{ - unsigned int len = cl->qdisc->q.qlen; - unsigned int backlog = cl->qdisc->qstats.backlog; - - qdisc_reset(cl->qdisc); - qdisc_tree_reduce_backlog(cl->qdisc, len, backlog); -} - static const struct nla_policy drr_policy[TCA_DRR_MAX + 1] = { [TCA_DRR_QUANTUM] = { .type = NLA_U32 }, }; @@ -167,7 +158,7 @@ static int drr_delete_class(struct Qdisc *sch, unsigned long arg) sch_tree_lock(sch); - drr_purge_queue(cl); + qdisc_purge_queue(cl->qdisc); qdisc_class_hash_remove(&q->clhash, &cl->common); sch_tree_unlock(sch); @@ -269,7 +260,8 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) { struct drr_class *cl = (struct drr_class *)arg; - __u32 qlen = cl->qdisc->q.qlen; + __u32 qlen = qdisc_qlen_sum(cl->qdisc); + struct Qdisc *cl_q = cl->qdisc; struct tc_drr_stats xstats; memset(&xstats, 0, sizeof(xstats)); @@ -279,7 +271,7 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg, if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 || gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || - gnet_stats_copy_queue(d, NULL, &cl->qdisc->qstats, qlen) < 0) + gnet_stats_copy_queue(d, cl_q->cpu_qstats, &cl_q->qstats, qlen) < 0) return -1; return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c @@ -845,16 +845,6 @@ qdisc_peek_len(struct Qdisc *sch) } static void -hfsc_purge_queue(struct Qdisc *sch, struct hfsc_class *cl) -{ - unsigned int len = cl->qdisc->q.qlen; - unsigned int backlog = cl->qdisc->qstats.backlog; - - qdisc_reset(cl->qdisc); - qdisc_tree_reduce_backlog(cl->qdisc, len, backlog); -} - -static void hfsc_adjust_levels(struct hfsc_class *cl) { struct hfsc_class *p; @@ -1076,7 +1066,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, qdisc_class_hash_insert(&q->clhash, &cl->cl_common); list_add_tail(&cl->siblings, &parent->children); if (parent->level == 0) - hfsc_purge_queue(sch, parent); + qdisc_purge_queue(parent->qdisc); hfsc_adjust_levels(parent); sch_tree_unlock(sch); @@ -1112,7 +1102,7 @@ hfsc_delete_class(struct Qdisc *sch, unsigned long arg) list_del(&cl->siblings); hfsc_adjust_levels(cl->cl_parent); - hfsc_purge_queue(sch, cl); + qdisc_purge_queue(cl->qdisc); qdisc_class_hash_remove(&q->clhash, &cl->cl_common); sch_tree_unlock(sch); @@ -1328,8 +1318,9 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg, { struct hfsc_class *cl = (struct hfsc_class *)arg; struct tc_hfsc_stats xstats; + __u32 qlen; - cl->qstats.backlog = cl->qdisc->qstats.backlog; + qdisc_qstats_qlen_backlog(cl->qdisc, &qlen, &cl->qstats.backlog); xstats.level = cl->level; xstats.period = cl->cl_vtperiod; xstats.work = cl->cl_total; @@ -1337,7 +1328,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg, if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 || gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || - gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->qdisc->q.qlen) < 0) + gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0) return -1; return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c @@ -1127,10 +1127,9 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) }; __u32 qlen = 0; - if (!cl->level && cl->leaf.q) { - qlen = cl->leaf.q->q.qlen; - qs.backlog = cl->leaf.q->qstats.backlog; - } + if (!cl->level && cl->leaf.q) + qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog); + cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens), INT_MIN, INT_MAX); cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens), @@ -1270,13 +1269,8 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) sch_tree_lock(sch); - if (!cl->level) { - unsigned int qlen = cl->leaf.q->q.qlen; - unsigned int backlog = cl->leaf.q->qstats.backlog; - - qdisc_reset(cl->leaf.q); - qdisc_tree_reduce_backlog(cl->leaf.q, qlen, backlog); - } + if (!cl->level) + qdisc_purge_queue(cl->leaf.q); /* delete from hash and active; remainder in destroy_class */ qdisc_class_hash_remove(&q->clhash, &cl->common); @@ -1404,12 +1398,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, classid, NULL); sch_tree_lock(sch); if (parent && !parent->level) { - unsigned int qlen = parent->leaf.q->q.qlen; - unsigned int backlog = parent->leaf.q->qstats.backlog; - /* turn parent into inner node */ - qdisc_reset(parent->leaf.q); - qdisc_tree_reduce_backlog(parent->leaf.q, qlen, backlog); + qdisc_purge_queue(parent->leaf.q); qdisc_put(parent->leaf.q); if (parent->prio_activity) htb_deactivate(q, parent); diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c @@ -249,7 +249,7 @@ static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl, sch = dev_queue->qdisc_sleeping; if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 || - gnet_stats_copy_queue(d, NULL, &sch->qstats, sch->q.qlen) < 0) + qdisc_qstats_copy(d, sch) < 0) return -1; return 0; } diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c @@ -561,8 +561,7 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, sch = dev_queue->qdisc_sleeping; if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &sch->bstats) < 0 || - gnet_stats_copy_queue(d, NULL, - &sch->qstats, sch->q.qlen) < 0) + qdisc_qstats_copy(d, sch) < 0) return -1; } return 0; diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c @@ -201,9 +201,9 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt, for (i = q->bands; i < q->max_bands; i++) { if (q->queues[i] != &noop_qdisc) { struct Qdisc *child = q->queues[i]; + q->queues[i] = &noop_qdisc; - qdisc_tree_reduce_backlog(child, child->q.qlen, - child->qstats.backlog); + qdisc_tree_flush_backlog(child); qdisc_put(child); } } @@ -225,9 +225,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt, qdisc_hash_add(child, true); if (old != &noop_qdisc) { - qdisc_tree_reduce_backlog(old, - old->q.qlen, - old->qstats.backlog); + qdisc_tree_flush_backlog(old); qdisc_put(old); } sch_tree_unlock(sch); @@ -344,7 +342,7 @@ static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl, cl_q = q->queues[cl - 1]; if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl_q->bstats) < 0 || - gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0) + qdisc_qstats_copy(d, cl_q) < 0) return -1; return 0; diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c @@ -216,12 +216,8 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt, q->bands = qopt->bands; memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); - for (i = q->bands; i < oldbands; i++) { - struct Qdisc *child = q->queues[i]; - - qdisc_tree_reduce_backlog(child, child->q.qlen, - child->qstats.backlog); - } + for (i = q->bands; i < oldbands; i++) + qdisc_tree_flush_backlog(q->queues[i]); for (i = oldbands; i < q->bands; i++) { q->queues[i] = queues[i]; @@ -365,7 +361,7 @@ static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl, cl_q = q->queues[cl - 1]; if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl_q->bstats) < 0 || - gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0) + qdisc_qstats_copy(d, cl_q) < 0) return -1; return 0; diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c @@ -217,15 +217,6 @@ static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid) return container_of(clc, struct qfq_class, common); } -static void qfq_purge_queue(struct qfq_class *cl) -{ - unsigned int len = cl->qdisc->q.qlen; - unsigned int backlog = cl->qdisc->qstats.backlog; - - qdisc_reset(cl->qdisc); - qdisc_tree_reduce_backlog(cl->qdisc, len, backlog); -} - static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = { [TCA_QFQ_WEIGHT] = { .type = NLA_U32 }, [TCA_QFQ_LMAX] = { .type = NLA_U32 }, @@ -551,7 +542,7 @@ static int qfq_delete_class(struct Qdisc *sch, unsigned long arg) sch_tree_lock(sch); - qfq_purge_queue(cl); + qdisc_purge_queue(cl->qdisc); qdisc_class_hash_remove(&q->clhash, &cl->common); sch_tree_unlock(sch); @@ -655,8 +646,7 @@ static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg, if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 || gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || - gnet_stats_copy_queue(d, NULL, - &cl->qdisc->qstats, cl->qdisc->q.qlen) < 0) + qdisc_qstats_copy(d, cl->qdisc) < 0) return -1; return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c @@ -233,8 +233,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt, q->flags = ctl->flags; q->limit = ctl->limit; if (child) { - qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen, - q->qdisc->qstats.backlog); + qdisc_tree_flush_backlog(q->qdisc); old_child = q->qdisc; q->qdisc = child; } diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c @@ -521,8 +521,7 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt, qdisc_hash_add(child, true); sch_tree_lock(sch); - qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen, - q->qdisc->qstats.backlog); + qdisc_tree_flush_backlog(q->qdisc); qdisc_put(q->qdisc); q->qdisc = child; diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c @@ -895,7 +895,7 @@ static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, sch = dev_queue->qdisc_sleeping; if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 || - gnet_stats_copy_queue(d, NULL, &sch->qstats, sch->q.qlen) < 0) + qdisc_qstats_copy(d, sch) < 0) return -1; return 0; } diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c @@ -391,8 +391,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt, sch_tree_lock(sch); if (child) { - qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen, - q->qdisc->qstats.backlog); + qdisc_tree_flush_backlog(q->qdisc); qdisc_put(q->qdisc); q->qdisc = child; } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c @@ -600,6 +600,7 @@ out: static int sctp_v4_addr_to_user(struct sctp_sock *sp, union sctp_addr *addr) { /* No address mapping for V4 sockets */ + memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero)); return sizeof(struct sockaddr_in); } diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c @@ -267,8 +267,14 @@ static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, if (msg->rep_type) tipc_tlv_init(msg->rep, msg->rep_type); - if (cmd->header) - (*cmd->header)(msg); + if (cmd->header) { + err = (*cmd->header)(msg); + if (err) { + kfree_skb(msg->rep); + msg->rep = NULL; + return err; + } + } arg = nlmsg_new(0, GFP_KERNEL); if (!arg) { @@ -397,7 +403,12 @@ static int tipc_nl_compat_bearer_enable(struct tipc_nl_compat_cmd_doit *cmd, if (!bearer) return -EMSGSIZE; - len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_BEARER_NAME); + len = TLV_GET_DATA_LEN(msg->req); + len -= offsetof(struct tipc_bearer_config, name); + if (len <= 0) + return -EINVAL; + + len = min_t(int, len, TIPC_MAX_BEARER_NAME); if (!string_is_valid(b->name, len)) return -EINVAL; @@ -766,7 +777,12 @@ static int tipc_nl_compat_link_set(struct tipc_nl_compat_cmd_doit *cmd, lc = (struct tipc_link_config *)TLV_DATA(msg->req); - len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME); + len = TLV_GET_DATA_LEN(msg->req); + len -= offsetof(struct tipc_link_config, name); + if (len <= 0) + return -EINVAL; + + len = min_t(int, len, TIPC_MAX_LINK_NAME); if (!string_is_valid(lc->name, len)) return -EINVAL; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c @@ -1484,6 +1484,8 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, return err; } + } else { + *zc = false; } rxm->full_len -= padding_length(ctx, tls_ctx, skb); diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile @@ -177,7 +177,7 @@ $(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION) $(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN) $(QUIET_LINK)$(CC) --shared -Wl,-soname,libbpf.so.$(VERSION) \ - -Wl,--version-script=$(VERSION_SCRIPT) $^ -o $@ + -Wl,--version-script=$(VERSION_SCRIPT) $^ -lelf -o $@ @ln -sf $(@F) $(OUTPUT)libbpf.so @ln -sf $(@F) $(OUTPUT)libbpf.so.$(VERSION) @@ -220,8 +220,9 @@ install_lib: all_cmd install_headers: $(call QUIET_INSTALL, headers) \ $(call do_install,bpf.h,$(prefix)/include/bpf,644); \ - $(call do_install,libbpf.h,$(prefix)/include/bpf,644); - $(call do_install,btf.h,$(prefix)/include/bpf,644); + $(call do_install,libbpf.h,$(prefix)/include/bpf,644); \ + $(call do_install,btf.h,$(prefix)/include/bpf,644); \ + $(call do_install,xsk.h,$(prefix)/include/bpf,644); install: install_lib diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c @@ -2107,6 +2107,9 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, return fwd_kind == real_kind; } + if (cand_kind != canon_kind) + return 0; + switch (cand_kind) { case BTF_KIND_INT: return btf_equal_int(cand_type, canon_type); diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c @@ -39,6 +39,58 @@ static struct bpf_flow_keys pkt_v6_flow_keys = { .n_proto = __bpf_constant_htons(ETH_P_IPV6), }; +#define VLAN_HLEN 4 + +static struct { + struct ethhdr eth; + __u16 vlan_tci; + __u16 vlan_proto; + struct iphdr iph; + struct tcphdr tcp; +} __packed pkt_vlan_v4 = { + .eth.h_proto = __bpf_constant_htons(ETH_P_8021Q), + .vlan_proto = __bpf_constant_htons(ETH_P_IP), + .iph.ihl = 5, + .iph.protocol = IPPROTO_TCP, + .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES), + .tcp.urg_ptr = 123, + .tcp.doff = 5, +}; + +static struct bpf_flow_keys pkt_vlan_v4_flow_keys = { + .nhoff = VLAN_HLEN, + .thoff = VLAN_HLEN + sizeof(struct iphdr), + .addr_proto = ETH_P_IP, + .ip_proto = IPPROTO_TCP, + .n_proto = __bpf_constant_htons(ETH_P_IP), +}; + +static struct { + struct ethhdr eth; + __u16 vlan_tci; + __u16 vlan_proto; + __u16 vlan_tci2; + __u16 vlan_proto2; + struct ipv6hdr iph; + struct tcphdr tcp; +} __packed pkt_vlan_v6 = { + .eth.h_proto = __bpf_constant_htons(ETH_P_8021AD), + .vlan_proto = __bpf_constant_htons(ETH_P_8021Q), + .vlan_proto2 = __bpf_constant_htons(ETH_P_IPV6), + .iph.nexthdr = IPPROTO_TCP, + .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES), + .tcp.urg_ptr = 123, + .tcp.doff = 5, +}; + +static struct bpf_flow_keys pkt_vlan_v6_flow_keys = { + .nhoff = VLAN_HLEN * 2, + .thoff = VLAN_HLEN * 2 + sizeof(struct ipv6hdr), + .addr_proto = ETH_P_IPV6, + .ip_proto = IPPROTO_TCP, + .n_proto = __bpf_constant_htons(ETH_P_IPV6), +}; + void test_flow_dissector(void) { struct bpf_flow_keys flow_keys; @@ -68,5 +120,21 @@ void test_flow_dissector(void) err, errno, retval, duration, size, sizeof(flow_keys)); CHECK_FLOW_KEYS("ipv6_flow_keys", flow_keys, pkt_v6_flow_keys); + err = bpf_prog_test_run(prog_fd, 10, &pkt_vlan_v4, sizeof(pkt_vlan_v4), + &flow_keys, &size, &retval, &duration); + CHECK(size != sizeof(flow_keys) || err || retval != 1, "vlan_ipv4", + "err %d errno %d retval %d duration %d size %u/%lu\n", + err, errno, retval, duration, size, sizeof(flow_keys)); + CHECK_FLOW_KEYS("vlan_ipv4_flow_keys", flow_keys, + pkt_vlan_v4_flow_keys); + + err = bpf_prog_test_run(prog_fd, 10, &pkt_vlan_v6, sizeof(pkt_vlan_v6), + &flow_keys, &size, &retval, &duration); + CHECK(size != sizeof(flow_keys) || err || retval != 1, "vlan_ipv6", + "err %d errno %d retval %d duration %d size %u/%lu\n", + err, errno, retval, duration, size, sizeof(flow_keys)); + CHECK_FLOW_KEYS("vlan_ipv6_flow_keys", flow_keys, + pkt_vlan_v6_flow_keys); + bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c @@ -92,7 +92,6 @@ static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto) { struct bpf_flow_keys *keys = skb->flow_keys; - keys->n_proto = proto; switch (proto) { case bpf_htons(ETH_P_IP): bpf_tail_call(skb, &jmp_table, IP); @@ -119,10 +118,9 @@ static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto) SEC("flow_dissector") int _dissect(struct __sk_buff *skb) { - if (!skb->vlan_present) - return parse_eth_proto(skb, skb->protocol); - else - return parse_eth_proto(skb, skb->vlan_proto); + struct bpf_flow_keys *keys = skb->flow_keys; + + return parse_eth_proto(skb, keys->n_proto); } /* Parses on IPPROTO_* */ @@ -336,15 +334,9 @@ PROG(VLAN)(struct __sk_buff *skb) { struct bpf_flow_keys *keys = skb->flow_keys; struct vlan_hdr *vlan, _vlan; - __be16 proto; - - /* Peek back to see if single or double-tagging */ - if (bpf_skb_load_bytes(skb, keys->thoff - sizeof(proto), &proto, - sizeof(proto))) - return BPF_DROP; /* Account for double-tagging */ - if (proto == bpf_htons(ETH_P_8021AD)) { + if (keys->n_proto == bpf_htons(ETH_P_8021AD)) { vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan); if (!vlan) return BPF_DROP; @@ -352,6 +344,7 @@ PROG(VLAN)(struct __sk_buff *skb) if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q)) return BPF_DROP; + keys->nhoff += sizeof(*vlan); keys->thoff += sizeof(*vlan); } @@ -359,12 +352,14 @@ PROG(VLAN)(struct __sk_buff *skb) if (!vlan) return BPF_DROP; + keys->nhoff += sizeof(*vlan); keys->thoff += sizeof(*vlan); /* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/ if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) || vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q)) return BPF_DROP; + keys->n_proto = vlan->h_vlan_encapsulated_proto; return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto); } diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c @@ -5777,6 +5777,53 @@ const struct btf_dedup_test dedup_tests[] = { }, }, { + .descr = "dedup: void equiv check", + /* + * // CU 1: + * struct s { + * struct {} *x; + * }; + * // CU 2: + * struct s { + * int *x; + * }; + */ + .input = { + .raw_types = { + /* CU 1 */ + BTF_STRUCT_ENC(0, 0, 1), /* [1] struct {} */ + BTF_PTR_ENC(1), /* [2] ptr -> [1] */ + BTF_STRUCT_ENC(NAME_NTH(1), 1, 8), /* [3] struct s */ + BTF_MEMBER_ENC(NAME_NTH(2), 2, 0), + /* CU 2 */ + BTF_PTR_ENC(0), /* [4] ptr -> void */ + BTF_STRUCT_ENC(NAME_NTH(1), 1, 8), /* [5] struct s */ + BTF_MEMBER_ENC(NAME_NTH(2), 4, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0s\0x"), + }, + .expect = { + .raw_types = { + /* CU 1 */ + BTF_STRUCT_ENC(0, 0, 1), /* [1] struct {} */ + BTF_PTR_ENC(1), /* [2] ptr -> [1] */ + BTF_STRUCT_ENC(NAME_NTH(1), 1, 8), /* [3] struct s */ + BTF_MEMBER_ENC(NAME_NTH(2), 2, 0), + /* CU 2 */ + BTF_PTR_ENC(0), /* [4] ptr -> void */ + BTF_STRUCT_ENC(NAME_NTH(1), 1, 8), /* [5] struct s */ + BTF_MEMBER_ENC(NAME_NTH(2), 4, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0s\0x"), + }, + .opts = { + .dont_resolve_fwds = false, + .dedup_table_size = 1, /* force hash collisions */ + }, +}, +{ .descr = "dedup: all possible kinds (no duplicates)", .input = { .raw_types = { diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c @@ -908,6 +908,44 @@ .result = REJECT, }, { + "calls: stack depth check in dead code", + .insns = { + /* main */ + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call A */ + BPF_EXIT_INSN(), + /* A */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 2), /* call B */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* B */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call C */ + BPF_EXIT_INSN(), + /* C */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call D */ + BPF_EXIT_INSN(), + /* D */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call E */ + BPF_EXIT_INSN(), + /* E */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call F */ + BPF_EXIT_INSN(), + /* F */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call G */ + BPF_EXIT_INSN(), + /* G */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call H */ + BPF_EXIT_INSN(), + /* H */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .errstr = "call stack", + .result = REJECT, +}, +{ "calls: spill into caller stack frame", .insns = { BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json b/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json @@ -144,6 +144,30 @@ ] }, { + "id": "7571", + "name": "Add sample action with invalid rate", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 0 group 1 index 2", + "expExitCode": "255", + "verifyCmd": "$TC actions get action sample index 2", + "matchPattern": "action order [0-9]+: sample rate 1/0 group 1.*index 2 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action sample" + ] + }, + { "id": "b6d4", "name": "Add sample action with mandatory arguments and invalid control action", "category": [