whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit 82aa4671516a3203261c835e98c3eecab10c994d
parent ffb845db50012eb3704a270efdf9b98be4e3454a
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Thu,  1 Nov 2018 09:16:01 -0700

Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

Pull networking fixes from David Miller:

 1) BPF verifier fixes from Daniel Borkmann.

 2) HNS driver fixes from Huazhong Tan.

 3) FDB only works for ethernet devices, reject attempts to install FDB
    rules for others. From Ido Schimmel.

 4) Fix spectre V1 in vhost, from Jason Wang.

 5) Don't pass on-stack object to irq_set_affinity_hint() in mvpp2
    driver, from Marc Zyngier.

 6) Fix mlx5e checksum handling when RXFCS is enabled, from Eric
    Dumazet.

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (49 commits)
  openvswitch: Fix push/pop ethernet validation
  net: stmmac: Fix stmmac_mdio_reset() when building stmmac as modules
  bpf: test make sure to run unpriv test cases in test_verifier
  bpf: add various test cases to test_verifier
  bpf: don't set id on after map lookup with ptr_to_map_val return
  bpf: fix partial copy of map_ptr when dst is scalar
  libbpf: Fix compile error in libbpf_attach_type_by_name
  kselftests/bpf: use ping6 as the default ipv6 ping binary if it exists
  selftests: mlxsw: qos_mc_aware: Add a test for UC awareness
  selftests: mlxsw: qos_mc_aware: Tweak for min shaper
  mlxsw: spectrum: Set minimum shaper on MC TCs
  mlxsw: reg: QEEC: Add minimum shaper fields
  net: hns3: bugfix for rtnl_lock's range in the hclgevf_reset()
  net: hns3: bugfix for rtnl_lock's range in the hclge_reset()
  net: hns3: bugfix for handling mailbox while the command queue reinitialized
  net: hns3: fix incorrect return value/type of some functions
  net: hns3: bugfix for hclge_mdio_write and hclge_mdio_read
  net: hns3: bugfix for is_valid_csq_clean_head()
  net: hns3: remove unnecessary queue reset in the hns3_uninit_all_ring()
  net: hns3: bugfix for the initialization of command queue's spin lock
  ...

Diffstat:
MDocumentation/networking/ip-sysctl.txt | 11+++++++++++
Mdrivers/net/bonding/bond_netlink.c | 3+--
Mdrivers/net/ethernet/hisilicon/hns3/hnae3.h | 6+++---
Mdrivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 117+++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------
Mdrivers/net/ethernet/hisilicon/hns3/hns3_enet.h | 2+-
Mdrivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c | 26++++++++++++++++----------
Mdrivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c | 2+-
Mdrivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 42+++++++++++++++++++++---------------------
Mdrivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 2+-
Mdrivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c | 6++++++
Mdrivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c | 4++--
Mdrivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 19++++++++++++-------
Mdrivers/net/ethernet/huawei/hinic/hinic_hw_qp.c | 2+-
Mdrivers/net/ethernet/huawei/hinic/hinic_hw_qp.h | 2+-
Mdrivers/net/ethernet/intel/Kconfig | 18++++++++++++++++++
Mdrivers/net/ethernet/intel/fm10k/fm10k_iov.c | 51++++++++++++++++++++++++++++++---------------------
Mdrivers/net/ethernet/intel/fm10k/fm10k_main.c | 2+-
Mdrivers/net/ethernet/intel/fm10k/fm10k_pci.c | 2++
Mdrivers/net/ethernet/intel/fm10k/fm10k_type.h | 2++
Mdrivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 2+-
Mdrivers/net/ethernet/intel/igb/igb_ptp.c | 8+++++++-
Mdrivers/net/ethernet/intel/ixgbe/Makefile | 2+-
Mdrivers/net/ethernet/intel/ixgbe/ixgbe.h | 8++++----
Mdrivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 6+++---
Mdrivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c | 4+++-
Mdrivers/net/ethernet/intel/ixgbevf/Makefile | 2+-
Mdrivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 4++--
Mdrivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 2+-
Mdrivers/net/ethernet/marvell/mvpp2/mvpp2.h | 1+
Mdrivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 18++++++++++++++----
Mdrivers/net/ethernet/mellanox/mlx4/en_rx.c | 1+
Mdrivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 45+++++++++------------------------------------
Mdrivers/net/ethernet/mellanox/mlxsw/core.c | 24+++++++++++++++++-------
Mdrivers/net/ethernet/mellanox/mlxsw/reg.h | 22+++++++++++++++++++++-
Mdrivers/net/ethernet/mellanox/mlxsw/spectrum.c | 25+++++++++++++++++++++++++
Mdrivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 2--
Mdrivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 2+-
Mdrivers/vhost/vhost.c | 2++
Minclude/linux/avf/virtchnl.h | 12+++++++++---
Minclude/linux/bpf_verifier.h | 3+++
Minclude/linux/inetdevice.h | 4+++-
Minclude/net/af_unix.h | 4++--
Mkernel/bpf/verifier.c | 21++++++++++++---------
Mnet/core/rtnetlink.c | 10++++++++++
Mnet/ipv4/igmp.c | 53++++++++++++++++++++++++++++++++++++-----------------
Mnet/ipv4/tcp_bpf.c | 1+
Mnet/openvswitch/flow_netlink.c | 4++--
Mnet/sctp/associola.c | 10+++++++---
Mnet/sctp/socket.c | 8+++++---
Mnet/xfrm/Kconfig | 1-
Mtools/lib/bpf/libbpf.c | 13+++++++------
Mtools/testing/selftests/bpf/flow_dissector_load.c | 2++
Mtools/testing/selftests/bpf/test_skb_cgroup_id.sh | 3++-
Mtools/testing/selftests/bpf/test_sock_addr.sh | 3++-
Mtools/testing/selftests/bpf/test_verifier.c | 321+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
Mtools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh | 95+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------
56 files changed, 793 insertions(+), 274 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt @@ -316,6 +316,17 @@ tcp_frto - INTEGER By default it's enabled with a non-zero value. 0 disables F-RTO. +tcp_fwmark_accept - BOOLEAN + If set, incoming connections to listening sockets that do not have a + socket mark will set the mark of the accepting socket to the fwmark of + the incoming SYN packet. This will cause all packets on that connection + (starting from the first SYNACK) to be sent with that fwmark. The + listening socket's mark is unchanged. Listening sockets that already + have a fwmark set via setsockopt(SOL_SOCKET, SO_MARK, ...) are + unaffected. + + Default: 0 + tcp_invalid_ratelimit - INTEGER Limit the maximal rate for sending duplicate acknowledgments in response to incoming TCP packets that are for an existing diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c @@ -638,8 +638,7 @@ static int bond_fill_info(struct sk_buff *skb, goto nla_put_failure; if (nla_put(skb, IFLA_BOND_AD_ACTOR_SYSTEM, - sizeof(bond->params.ad_actor_system), - &bond->params.ad_actor_system)) + ETH_ALEN, &bond->params.ad_actor_system)) goto nla_put_failure; } if (!bond_3ad_get_active_agg_info(bond, &info)) { diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -316,8 +316,8 @@ struct hnae3_ae_ops { int (*set_loopback)(struct hnae3_handle *handle, enum hnae3_loop loop_mode, bool en); - void (*set_promisc_mode)(struct hnae3_handle *handle, bool en_uc_pmc, - bool en_mc_pmc); + int (*set_promisc_mode)(struct hnae3_handle *handle, bool en_uc_pmc, + bool en_mc_pmc); int (*set_mtu)(struct hnae3_handle *handle, int new_mtu); void (*get_pauseparam)(struct hnae3_handle *handle, @@ -391,7 +391,7 @@ struct hnae3_ae_ops { int vector_num, struct hnae3_ring_chain_node *vr_chain); - void (*reset_queue)(struct hnae3_handle *handle, u16 queue_id); + int (*reset_queue)(struct hnae3_handle *handle, u16 queue_id); u32 (*get_fw_version)(struct hnae3_handle *handle); void (*get_mdix_mode)(struct hnae3_handle *handle, u8 *tp_mdix_ctrl, u8 *tp_mdix); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -509,16 +509,18 @@ static void hns3_nic_set_rx_mode(struct net_device *netdev) h->netdev_flags = new_flags; } -void hns3_update_promisc_mode(struct net_device *netdev, u8 promisc_flags) +int hns3_update_promisc_mode(struct net_device *netdev, u8 promisc_flags) { struct hns3_nic_priv *priv = netdev_priv(netdev); struct hnae3_handle *h = priv->ae_handle; if (h->ae_algo->ops->set_promisc_mode) { - h->ae_algo->ops->set_promisc_mode(h, - promisc_flags & HNAE3_UPE, - promisc_flags & HNAE3_MPE); + return h->ae_algo->ops->set_promisc_mode(h, + promisc_flags & HNAE3_UPE, + promisc_flags & HNAE3_MPE); } + + return 0; } void hns3_enable_vlan_filter(struct net_device *netdev, bool enable) @@ -1494,18 +1496,22 @@ static int hns3_vlan_rx_kill_vid(struct net_device *netdev, return ret; } -static void hns3_restore_vlan(struct net_device *netdev) +static int hns3_restore_vlan(struct net_device *netdev) { struct hns3_nic_priv *priv = netdev_priv(netdev); + int ret = 0; u16 vid; - int ret; for_each_set_bit(vid, priv->active_vlans, VLAN_N_VID) { ret = hns3_vlan_rx_add_vid(netdev, htons(ETH_P_8021Q), vid); - if (ret) - netdev_warn(netdev, "Restore vlan: %d filter, ret:%d\n", - vid, ret); + if (ret) { + netdev_err(netdev, "Restore vlan: %d filter, ret:%d\n", + vid, ret); + return ret; + } } + + return ret; } static int hns3_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, @@ -2727,7 +2733,7 @@ static int hns3_get_vector_ring_chain(struct hns3_enet_tqp_vector *tqp_vector, chain = devm_kzalloc(&pdev->dev, sizeof(*chain), GFP_KERNEL); if (!chain) - return -ENOMEM; + goto err_free_chain; cur_chain->next = chain; chain->tqp_index = tx_ring->tqp->tqp_index; @@ -2757,7 +2763,7 @@ static int hns3_get_vector_ring_chain(struct hns3_enet_tqp_vector *tqp_vector, while (rx_ring) { chain = devm_kzalloc(&pdev->dev, sizeof(*chain), GFP_KERNEL); if (!chain) - return -ENOMEM; + goto err_free_chain; cur_chain->next = chain; chain->tqp_index = rx_ring->tqp->tqp_index; @@ -2772,6 +2778,16 @@ static int hns3_get_vector_ring_chain(struct hns3_enet_tqp_vector *tqp_vector, } return 0; + +err_free_chain: + cur_chain = head->next; + while (cur_chain) { + chain = cur_chain->next; + devm_kfree(&pdev->dev, chain); + cur_chain = chain; + } + + return -ENOMEM; } static void hns3_free_vector_ring_chain(struct hns3_enet_tqp_vector *tqp_vector, @@ -2821,7 +2837,7 @@ static int hns3_nic_init_vector_data(struct hns3_nic_priv *priv) struct hnae3_handle *h = priv->ae_handle; struct hns3_enet_tqp_vector *tqp_vector; int ret = 0; - u16 i; + int i; hns3_nic_set_cpumask(priv); @@ -2868,13 +2884,19 @@ static int hns3_nic_init_vector_data(struct hns3_nic_priv *priv) hns3_free_vector_ring_chain(tqp_vector, &vector_ring_chain); if (ret) - return ret; + goto map_ring_fail; netif_napi_add(priv->netdev, &tqp_vector->napi, hns3_nic_common_poll, NAPI_POLL_WEIGHT); } return 0; + +map_ring_fail: + while (i--) + netif_napi_del(&priv->tqp_vector[i].napi); + + return ret; } static int hns3_nic_alloc_vector_data(struct hns3_nic_priv *priv) @@ -3031,8 +3053,10 @@ static int hns3_queue_to_ring(struct hnae3_queue *tqp, return ret; ret = hns3_ring_get_cfg(tqp, priv, HNAE3_RING_TYPE_RX); - if (ret) + if (ret) { + devm_kfree(priv->dev, priv->ring_data[tqp->tqp_index].ring); return ret; + } return 0; } @@ -3059,6 +3083,12 @@ static int hns3_get_ring_config(struct hns3_nic_priv *priv) return 0; err: + while (i--) { + devm_kfree(priv->dev, priv->ring_data[i].ring); + devm_kfree(priv->dev, + priv->ring_data[i + h->kinfo.num_tqps].ring); + } + devm_kfree(&pdev->dev, priv->ring_data); return ret; } @@ -3226,9 +3256,6 @@ int hns3_uninit_all_ring(struct hns3_nic_priv *priv) int i; for (i = 0; i < h->kinfo.num_tqps; i++) { - if (h->ae_algo->ops->reset_queue) - h->ae_algo->ops->reset_queue(h, i); - hns3_fini_ring(priv->ring_data[i].ring); hns3_fini_ring(priv->ring_data[i + h->kinfo.num_tqps].ring); } @@ -3236,11 +3263,12 @@ int hns3_uninit_all_ring(struct hns3_nic_priv *priv) } /* Set mac addr if it is configured. or leave it to the AE driver */ -static void hns3_init_mac_addr(struct net_device *netdev, bool init) +static int hns3_init_mac_addr(struct net_device *netdev, bool init) { struct hns3_nic_priv *priv = netdev_priv(netdev); struct hnae3_handle *h = priv->ae_handle; u8 mac_addr_temp[ETH_ALEN]; + int ret = 0; if (h->ae_algo->ops->get_mac_addr && init) { h->ae_algo->ops->get_mac_addr(h, mac_addr_temp); @@ -3255,8 +3283,9 @@ static void hns3_init_mac_addr(struct net_device *netdev, bool init) } if (h->ae_algo->ops->set_mac_addr) - h->ae_algo->ops->set_mac_addr(h, netdev->dev_addr, true); + ret = h->ae_algo->ops->set_mac_addr(h, netdev->dev_addr, true); + return ret; } static int hns3_restore_fd_rules(struct net_device *netdev) @@ -3469,20 +3498,29 @@ err_out: return ret; } -static void hns3_recover_hw_addr(struct net_device *ndev) +static int hns3_recover_hw_addr(struct net_device *ndev) { struct netdev_hw_addr_list *list; struct netdev_hw_addr *ha, *tmp; + int ret = 0; /* go through and sync uc_addr entries to the device */ list = &ndev->uc; - list_for_each_entry_safe(ha, tmp, &list->list, list) - hns3_nic_uc_sync(ndev, ha->addr); + list_for_each_entry_safe(ha, tmp, &list->list, list) { + ret = hns3_nic_uc_sync(ndev, ha->addr); + if (ret) + return ret; + } /* go through and sync mc_addr entries to the device */ list = &ndev->mc; - list_for_each_entry_safe(ha, tmp, &list->list, list) - hns3_nic_mc_sync(ndev, ha->addr); + list_for_each_entry_safe(ha, tmp, &list->list, list) { + ret = hns3_nic_mc_sync(ndev, ha->addr); + if (ret) + return ret; + } + + return ret; } static void hns3_remove_hw_addr(struct net_device *netdev) @@ -3609,7 +3647,10 @@ int hns3_nic_reset_all_ring(struct hnae3_handle *h) int ret; for (i = 0; i < h->kinfo.num_tqps; i++) { - h->ae_algo->ops->reset_queue(h, i); + ret = h->ae_algo->ops->reset_queue(h, i); + if (ret) + return ret; + hns3_init_ring_hw(priv->ring_data[i].ring); /* We need to clear tx ring here because self test will @@ -3701,18 +3742,30 @@ static int hns3_reset_notify_init_enet(struct hnae3_handle *handle) bool vlan_filter_enable; int ret; - hns3_init_mac_addr(netdev, false); - hns3_recover_hw_addr(netdev); - hns3_update_promisc_mode(netdev, handle->netdev_flags); + ret = hns3_init_mac_addr(netdev, false); + if (ret) + return ret; + + ret = hns3_recover_hw_addr(netdev); + if (ret) + return ret; + + ret = hns3_update_promisc_mode(netdev, handle->netdev_flags); + if (ret) + return ret; + vlan_filter_enable = netdev->flags & IFF_PROMISC ? false : true; hns3_enable_vlan_filter(netdev, vlan_filter_enable); - /* Hardware table is only clear when pf resets */ - if (!(handle->flags & HNAE3_SUPPORT_VF)) - hns3_restore_vlan(netdev); + if (!(handle->flags & HNAE3_SUPPORT_VF)) { + ret = hns3_restore_vlan(netdev); + return ret; + } - hns3_restore_fd_rules(netdev); + ret = hns3_restore_fd_rules(netdev); + if (ret) + return ret; /* Carrier off reporting is important to ethtool even BEFORE open */ netif_carrier_off(netdev); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -640,7 +640,7 @@ void hns3_set_vector_coalesce_rl(struct hns3_enet_tqp_vector *tqp_vector, u32 rl_value); void hns3_enable_vlan_filter(struct net_device *netdev, bool enable); -void hns3_update_promisc_mode(struct net_device *netdev, u8 promisc_flags); +int hns3_update_promisc_mode(struct net_device *netdev, u8 promisc_flags); #ifdef CONFIG_HNS3_DCB void hns3_dcbnl_setup(struct hnae3_handle *handle); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c @@ -24,15 +24,15 @@ static int hclge_ring_space(struct hclge_cmq_ring *ring) return ring->desc_num - used - 1; } -static int is_valid_csq_clean_head(struct hclge_cmq_ring *ring, int h) +static int is_valid_csq_clean_head(struct hclge_cmq_ring *ring, int head) { - int u = ring->next_to_use; - int c = ring->next_to_clean; + int ntu = ring->next_to_use; + int ntc = ring->next_to_clean; - if (unlikely(h >= ring->desc_num)) - return 0; + if (ntu > ntc) + return head >= ntc && head <= ntu; - return u > c ? (h > c && h <= u) : (h > c || h <= u); + return head >= ntc || head <= ntu; } static int hclge_alloc_cmd_desc(struct hclge_cmq_ring *ring) @@ -304,6 +304,10 @@ int hclge_cmd_queue_init(struct hclge_dev *hdev) { int ret; + /* Setup the lock for command queue */ + spin_lock_init(&hdev->hw.cmq.csq.lock); + spin_lock_init(&hdev->hw.cmq.crq.lock); + /* Setup the queue entries for use cmd queue */ hdev->hw.cmq.csq.desc_num = HCLGE_NIC_CMQ_DESC_NUM; hdev->hw.cmq.crq.desc_num = HCLGE_NIC_CMQ_DESC_NUM; @@ -337,18 +341,20 @@ int hclge_cmd_init(struct hclge_dev *hdev) u32 version; int ret; + spin_lock_bh(&hdev->hw.cmq.csq.lock); + spin_lock_bh(&hdev->hw.cmq.crq.lock); + hdev->hw.cmq.csq.next_to_clean = 0; hdev->hw.cmq.csq.next_to_use = 0; hdev->hw.cmq.crq.next_to_clean = 0; hdev->hw.cmq.crq.next_to_use = 0; - /* Setup the lock for command queue */ - spin_lock_init(&hdev->hw.cmq.csq.lock); - spin_lock_init(&hdev->hw.cmq.crq.lock); - hclge_cmd_init_regs(&hdev->hw); clear_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state); + spin_unlock_bh(&hdev->hw.cmq.crq.lock); + spin_unlock_bh(&hdev->hw.cmq.csq.lock); + ret = hclge_cmd_query_firmware_version(&hdev->hw, &version); if (ret) { dev_err(&hdev->pdev->dev, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c @@ -751,7 +751,7 @@ static void hclge_process_ncsi_error(struct hclge_dev *hdev, ret = hclge_cmd_clear_error(hdev, &desc_wr, &desc_rd, HCLGE_NCSI_INT_CLR, 0); if (ret) - dev_err(dev, "failed(=%d) to clear NCSI intrerrupt status\n", + dev_err(dev, "failed(=%d) to clear NCSI interrupt status\n", ret); } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -2236,7 +2236,7 @@ static irqreturn_t hclge_misc_irq_handle(int irq, void *data) } /* clear the source of interrupt if it is not cause by reset */ - if (event_cause != HCLGE_VECTOR0_EVENT_RST) { + if (event_cause == HCLGE_VECTOR0_EVENT_MBX) { hclge_clear_event_cause(hdev, event_cause, clearval); hclge_enable_vector(&hdev->misc_vector, true); } @@ -2470,14 +2470,17 @@ static void hclge_reset(struct hclge_dev *hdev) handle = &hdev->vport[0].nic; rtnl_lock(); hclge_notify_client(hdev, HNAE3_DOWN_CLIENT); + rtnl_unlock(); if (!hclge_reset_wait(hdev)) { + rtnl_lock(); hclge_notify_client(hdev, HNAE3_UNINIT_CLIENT); hclge_reset_ae_dev(hdev->ae_dev); hclge_notify_client(hdev, HNAE3_INIT_CLIENT); hclge_clear_reset_cause(hdev); } else { + rtnl_lock(); /* schedule again to check pending resets later */ set_bit(hdev->reset_type, &hdev->reset_pending); hclge_reset_task_schedule(hdev); @@ -3314,8 +3317,8 @@ void hclge_promisc_param_init(struct hclge_promisc_param *param, bool en_uc, param->vf_id = vport_id; } -static void hclge_set_promisc_mode(struct hnae3_handle *handle, bool en_uc_pmc, - bool en_mc_pmc) +static int hclge_set_promisc_mode(struct hnae3_handle *handle, bool en_uc_pmc, + bool en_mc_pmc) { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; @@ -3323,7 +3326,7 @@ static void hclge_set_promisc_mode(struct hnae3_handle *handle, bool en_uc_pmc, hclge_promisc_param_init(&param, en_uc_pmc, en_mc_pmc, true, vport->vport_id); - hclge_cmd_set_promisc_mode(hdev, &param); + return hclge_cmd_set_promisc_mode(hdev, &param); } static int hclge_get_fd_mode(struct hclge_dev *hdev, u8 *fd_mode) @@ -6107,31 +6110,28 @@ static u16 hclge_covert_handle_qid_global(struct hnae3_handle *handle, return tqp->index; } -void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id) +int hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id) { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; int reset_try_times = 0; int reset_status; u16 queue_gid; - int ret; - - if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) - return; + int ret = 0; queue_gid = hclge_covert_handle_qid_global(handle, queue_id); ret = hclge_tqp_enable(hdev, queue_id, 0, false); if (ret) { - dev_warn(&hdev->pdev->dev, "Disable tqp fail, ret = %d\n", ret); - return; + dev_err(&hdev->pdev->dev, "Disable tqp fail, ret = %d\n", ret); + return ret; } ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, true); if (ret) { - dev_warn(&hdev->pdev->dev, - "Send reset tqp cmd fail, ret = %d\n", ret); - return; + dev_err(&hdev->pdev->dev, + "Send reset tqp cmd fail, ret = %d\n", ret); + return ret; } reset_try_times = 0; @@ -6144,16 +6144,16 @@ void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id) } if (reset_try_times >= HCLGE_TQP_RESET_TRY_TIMES) { - dev_warn(&hdev->pdev->dev, "Reset TQP fail\n"); - return; + dev_err(&hdev->pdev->dev, "Reset TQP fail\n"); + return ret; } ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, false); - if (ret) { - dev_warn(&hdev->pdev->dev, - "Deassert the soft reset fail, ret = %d\n", ret); - return; - } + if (ret) + dev_err(&hdev->pdev->dev, + "Deassert the soft reset fail, ret = %d\n", ret); + + return ret; } void hclge_reset_vf_queue(struct hclge_vport *vport, u16 queue_id) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -778,7 +778,7 @@ int hclge_rss_init_hw(struct hclge_dev *hdev); void hclge_rss_indir_init_cfg(struct hclge_dev *hdev); void hclge_mbx_handler(struct hclge_dev *hdev); -void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id); +int hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id); void hclge_reset_vf_queue(struct hclge_vport *vport, u16 queue_id); int hclge_cfg_flowctrl(struct hclge_dev *hdev); int hclge_func_reset_cmd(struct hclge_dev *hdev, int func_id); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -400,6 +400,12 @@ void hclge_mbx_handler(struct hclge_dev *hdev) /* handle all the mailbox requests in the queue */ while (!hclge_cmd_crq_empty(&hdev->hw)) { + if (test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state)) { + dev_warn(&hdev->pdev->dev, + "command queue needs re-initializing\n"); + return; + } + desc = &crq->desc[crq->next_to_use]; req = (struct hclge_mbx_vf_to_pf_cmd *)desc->data; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c @@ -52,7 +52,7 @@ static int hclge_mdio_write(struct mii_bus *bus, int phyid, int regnum, struct hclge_desc desc; int ret; - if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) + if (test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state)) return 0; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MDIO_CONFIG, false); @@ -90,7 +90,7 @@ static int hclge_mdio_read(struct mii_bus *bus, int phyid, int regnum) struct hclge_desc desc; int ret; - if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) + if (test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state)) return 0; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MDIO_CONFIG, true); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -925,12 +925,12 @@ static int hclgevf_cmd_set_promisc_mode(struct hclgevf_dev *hdev, return status; } -static void hclgevf_set_promisc_mode(struct hnae3_handle *handle, - bool en_uc_pmc, bool en_mc_pmc) +static int hclgevf_set_promisc_mode(struct hnae3_handle *handle, + bool en_uc_pmc, bool en_mc_pmc) { struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); - hclgevf_cmd_set_promisc_mode(hdev, en_uc_pmc, en_mc_pmc); + return hclgevf_cmd_set_promisc_mode(hdev, en_uc_pmc, en_mc_pmc); } static int hclgevf_tqp_enable(struct hclgevf_dev *hdev, int tqp_id, @@ -1080,7 +1080,7 @@ static int hclgevf_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable) 1, false, NULL, 0); } -static void hclgevf_reset_tqp(struct hnae3_handle *handle, u16 queue_id) +static int hclgevf_reset_tqp(struct hnae3_handle *handle, u16 queue_id) { struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); u8 msg_data[2]; @@ -1091,10 +1091,10 @@ static void hclgevf_reset_tqp(struct hnae3_handle *handle, u16 queue_id) /* disable vf queue before send queue reset msg to PF */ ret = hclgevf_tqp_enable(hdev, queue_id, 0, false); if (ret) - return; + return ret; - hclgevf_send_mbx_msg(hdev, HCLGE_MBX_QUEUE_RESET, 0, msg_data, - 2, true, NULL, 0); + return hclgevf_send_mbx_msg(hdev, HCLGE_MBX_QUEUE_RESET, 0, msg_data, + 2, true, NULL, 0); } static int hclgevf_notify_client(struct hclgevf_dev *hdev, @@ -1170,6 +1170,8 @@ static int hclgevf_reset(struct hclgevf_dev *hdev) /* bring down the nic to stop any ongoing TX/RX */ hclgevf_notify_client(hdev, HNAE3_DOWN_CLIENT); + rtnl_unlock(); + /* check if VF could successfully fetch the hardware reset completion * status from the hardware */ @@ -1181,12 +1183,15 @@ static int hclgevf_reset(struct hclgevf_dev *hdev) ret); dev_warn(&hdev->pdev->dev, "VF reset failed, disabling VF!\n"); + rtnl_lock(); hclgevf_notify_client(hdev, HNAE3_UNINIT_CLIENT); rtnl_unlock(); return ret; } + rtnl_lock(); + /* now, re-initialize the nic client and ae device*/ ret = hclgevf_reset_stack(hdev); if (ret) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c @@ -532,7 +532,7 @@ void hinic_task_set_inner_l3(struct hinic_sq_task *task, } void hinic_task_set_tunnel_l4(struct hinic_sq_task *task, - enum hinic_l4_offload_type l4_type, + enum hinic_l4_tunnel_type l4_type, u32 tunnel_len) { task->pkt_info2 |= HINIC_SQ_TASK_INFO2_SET(l4_type, TUNNEL_L4TYPE) | diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h @@ -160,7 +160,7 @@ void hinic_task_set_inner_l3(struct hinic_sq_task *task, u32 network_len); void hinic_task_set_tunnel_l4(struct hinic_sq_task *task, - enum hinic_l4_offload_type l4_type, + enum hinic_l4_tunnel_type l4_type, u32 tunnel_len); void hinic_set_cs_inner_l4(struct hinic_sq_task *task, diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig @@ -200,6 +200,15 @@ config IXGBE_DCB If unsure, say N. +config IXGBE_IPSEC + bool "IPSec XFRM cryptography-offload acceleration" + depends on IXGBE + depends on XFRM_OFFLOAD + default y + select XFRM_ALGO + ---help--- + Enable support for IPSec offload in ixgbe.ko + config IXGBEVF tristate "Intel(R) 10GbE PCI Express Virtual Function Ethernet support" depends on PCI_MSI @@ -217,6 +226,15 @@ config IXGBEVF will be called ixgbevf. MSI-X interrupt support is required for this driver to work correctly. +config IXGBEVF_IPSEC + bool "IPSec XFRM cryptography-offload acceleration" + depends on IXGBEVF + depends on XFRM_OFFLOAD + default y + select XFRM_ALGO + ---help--- + Enable support for IPSec offload in ixgbevf.ko + config I40E tristate "Intel(R) Ethernet Controller XL710 Family support" imply PTP_1588_CLOCK diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c @@ -244,7 +244,8 @@ process_mbx: } /* guarantee we have free space in the SM mailbox */ - if (!hw->mbx.ops.tx_ready(&hw->mbx, FM10K_VFMBX_MSG_MTU)) { + if (hw->mbx.state == FM10K_STATE_OPEN && + !hw->mbx.ops.tx_ready(&hw->mbx, FM10K_VFMBX_MSG_MTU)) { /* keep track of how many times this occurs */ interface->hw_sm_mbx_full++; @@ -302,6 +303,28 @@ void fm10k_iov_suspend(struct pci_dev *pdev) } } +static void fm10k_mask_aer_comp_abort(struct pci_dev *pdev) +{ + u32 err_mask; + int pos; + + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR); + if (!pos) + return; + + /* Mask the completion abort bit in the ERR_UNCOR_MASK register, + * preventing the device from reporting these errors to the upstream + * PCIe root device. This avoids bringing down platforms which upgrade + * non-fatal completer aborts into machine check exceptions. Completer + * aborts can occur whenever a VF reads a queue it doesn't own. + */ + pci_read_config_dword(pdev, pos + PCI_ERR_UNCOR_MASK, &err_mask); + err_mask |= PCI_ERR_UNC_COMP_ABORT; + pci_write_config_dword(pdev, pos + PCI_ERR_UNCOR_MASK, err_mask); + + mmiowb(); +} + int fm10k_iov_resume(struct pci_dev *pdev) { struct fm10k_intfc *interface = pci_get_drvdata(pdev); @@ -317,6 +340,12 @@ int fm10k_iov_resume(struct pci_dev *pdev) if (!iov_data) return -ENOMEM; + /* Lower severity of completer abort error reporting as + * the VFs can trigger this any time they read a queue + * that they don't own. + */ + fm10k_mask_aer_comp_abort(pdev); + /* allocate hardware resources for the VFs */ hw->iov.ops.assign_resources(hw, num_vfs, num_vfs); @@ -460,20 +489,6 @@ void fm10k_iov_disable(struct pci_dev *pdev) fm10k_iov_free_data(pdev); } -static void fm10k_disable_aer_comp_abort(struct pci_dev *pdev) -{ - u32 err_sev; - int pos; - - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR); - if (!pos) - return; - - pci_read_config_dword(pdev, pos + PCI_ERR_UNCOR_SEVER, &err_sev); - err_sev &= ~PCI_ERR_UNC_COMP_ABORT; - pci_write_config_dword(pdev, pos + PCI_ERR_UNCOR_SEVER, err_sev); -} - int fm10k_iov_configure(struct pci_dev *pdev, int num_vfs) { int current_vfs = pci_num_vf(pdev); @@ -495,12 +510,6 @@ int fm10k_iov_configure(struct pci_dev *pdev, int num_vfs) /* allocate VFs if not already allocated */ if (num_vfs && num_vfs != current_vfs) { - /* Disable completer abort error reporting as - * the VFs can trigger this any time they read a queue - * that they don't own. - */ - fm10k_disable_aer_comp_abort(pdev); - err = pci_enable_sriov(pdev, num_vfs); if (err) { dev_err(&pdev->dev, diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -11,7 +11,7 @@ #include "fm10k.h" -#define DRV_VERSION "0.23.4-k" +#define DRV_VERSION "0.26.1-k" #define DRV_SUMMARY "Intel(R) Ethernet Switch Host Interface Driver" const char fm10k_driver_version[] = DRV_VERSION; char fm10k_driver_name[] = "fm10k"; diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -23,6 +23,8 @@ static const struct fm10k_info *fm10k_info_tbl[] = { */ static const struct pci_device_id fm10k_pci_tbl[] = { { PCI_VDEVICE(INTEL, FM10K_DEV_ID_PF), fm10k_device_pf }, + { PCI_VDEVICE(INTEL, FM10K_DEV_ID_SDI_FM10420_QDA2), fm10k_device_pf }, + { PCI_VDEVICE(INTEL, FM10K_DEV_ID_SDI_FM10420_DA2), fm10k_device_pf }, { PCI_VDEVICE(INTEL, FM10K_DEV_ID_VF), fm10k_device_vf }, /* required last entry */ { 0, } diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_type.h b/drivers/net/ethernet/intel/fm10k/fm10k_type.h @@ -15,6 +15,8 @@ struct fm10k_hw; #define FM10K_DEV_ID_PF 0x15A4 #define FM10K_DEV_ID_VF 0x15A5 +#define FM10K_DEV_ID_SDI_FM10420_QDA2 0x15D0 +#define FM10K_DEV_ID_SDI_FM10420_DA2 0x15D5 #define FM10K_MAX_QUEUES 256 #define FM10K_MAX_QUEUES_PF 128 diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -3674,7 +3674,7 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode, dev_err(&pf->pdev->dev, "Invalid message from VF %d, opcode %d, len %d\n", local_vf_id, v_opcode, msglen); switch (ret) { - case VIRTCHNL_ERR_PARAM: + case VIRTCHNL_STATUS_ERR_PARAM: return -EPERM; default: return -EINVAL; diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -51,9 +51,15 @@ * * The 40 bit 82580 SYSTIM overflows every * 2^40 * 10^-9 / 60 = 18.3 minutes. + * + * SYSTIM is converted to real time using a timecounter. As + * timecounter_cyc2time() allows old timestamps, the timecounter + * needs to be updated at least once per half of the SYSTIM interval. + * Scheduling of delayed work is not very accurate, so we aim for 8 + * minutes to be sure the actual interval is shorter than 9.16 minutes. */ -#define IGB_SYSTIM_OVERFLOW_PERIOD (HZ * 60 * 9) +#define IGB_SYSTIM_OVERFLOW_PERIOD (HZ * 60 * 8) #define IGB_PTP_TX_TIMEOUT (HZ * 15) #define INCPERIOD_82576 BIT(E1000_TIMINCA_16NS_SHIFT) #define INCVALUE_82576_MASK GENMASK(E1000_TIMINCA_16NS_SHIFT - 1, 0) diff --git a/drivers/net/ethernet/intel/ixgbe/Makefile b/drivers/net/ethernet/intel/ixgbe/Makefile @@ -17,4 +17,4 @@ ixgbe-$(CONFIG_IXGBE_DCB) += ixgbe_dcb.o ixgbe_dcb_82598.o \ ixgbe-$(CONFIG_IXGBE_HWMON) += ixgbe_sysfs.o ixgbe-$(CONFIG_DEBUG_FS) += ixgbe_debugfs.o ixgbe-$(CONFIG_FCOE:m=y) += ixgbe_fcoe.o -ixgbe-$(CONFIG_XFRM_OFFLOAD) += ixgbe_ipsec.o +ixgbe-$(CONFIG_IXGBE_IPSEC) += ixgbe_ipsec.o diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -769,9 +769,9 @@ struct ixgbe_adapter { #define IXGBE_RSS_KEY_SIZE 40 /* size of RSS Hash Key in bytes */ u32 *rss_key; -#ifdef CONFIG_XFRM_OFFLOAD +#ifdef CONFIG_IXGBE_IPSEC struct ixgbe_ipsec *ipsec; -#endif /* CONFIG_XFRM_OFFLOAD */ +#endif /* CONFIG_IXGBE_IPSEC */ /* AF_XDP zero-copy */ struct xdp_umem **xsk_umems; @@ -1008,7 +1008,7 @@ void ixgbe_store_key(struct ixgbe_adapter *adapter); void ixgbe_store_reta(struct ixgbe_adapter *adapter); s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg, u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm); -#ifdef CONFIG_XFRM_OFFLOAD +#ifdef CONFIG_IXGBE_IPSEC void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter); void ixgbe_stop_ipsec_offload(struct ixgbe_adapter *adapter); void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter); @@ -1036,5 +1036,5 @@ static inline int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *mbuf, u32 vf) { return -EACCES; } static inline int ixgbe_ipsec_vf_del_sa(struct ixgbe_adapter *adapter, u32 *mbuf, u32 vf) { return -EACCES; } -#endif /* CONFIG_XFRM_OFFLOAD */ +#endif /* CONFIG_IXGBE_IPSEC */ #endif /* _IXGBE_H_ */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -8694,7 +8694,7 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb, #endif /* IXGBE_FCOE */ -#ifdef CONFIG_XFRM_OFFLOAD +#ifdef CONFIG_IXGBE_IPSEC if (skb->sp && !ixgbe_ipsec_tx(tx_ring, first, &ipsec_tx)) goto out_drop; #endif @@ -10190,7 +10190,7 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev, * the TSO, so it's the exception. */ if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID)) { -#ifdef CONFIG_XFRM_OFFLOAD +#ifdef CONFIG_IXGBE_IPSEC if (!skb->sp) #endif features &= ~NETIF_F_TSO; @@ -10883,7 +10883,7 @@ skip_sriov: if (hw->mac.type >= ixgbe_mac_82599EB) netdev->features |= NETIF_F_SCTP_CRC; -#ifdef CONFIG_XFRM_OFFLOAD +#ifdef CONFIG_IXGBE_IPSEC #define IXGBE_ESP_FEATURES (NETIF_F_HW_ESP | \ NETIF_F_HW_ESP_TX_CSUM | \ NETIF_F_GSO_ESP) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -722,8 +722,10 @@ static inline void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf) ixgbe_set_vmvir(adapter, vfinfo->pf_vlan, adapter->default_up, vf); - if (vfinfo->spoofchk_enabled) + if (vfinfo->spoofchk_enabled) { hw->mac.ops.set_vlan_anti_spoofing(hw, true, vf); + hw->mac.ops.set_mac_anti_spoofing(hw, true, vf); + } } /* reset multicast table array for vf */ diff --git a/drivers/net/ethernet/intel/ixgbevf/Makefile b/drivers/net/ethernet/intel/ixgbevf/Makefile @@ -10,5 +10,5 @@ ixgbevf-objs := vf.o \ mbx.o \ ethtool.o \ ixgbevf_main.o -ixgbevf-$(CONFIG_XFRM_OFFLOAD) += ipsec.o +ixgbevf-$(CONFIG_IXGBEVF_IPSEC) += ipsec.o diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -459,7 +459,7 @@ int ethtool_ioctl(struct ifreq *ifr); extern void ixgbevf_write_eitr(struct ixgbevf_q_vector *q_vector); -#ifdef CONFIG_XFRM_OFFLOAD +#ifdef CONFIG_IXGBEVF_IPSEC void ixgbevf_init_ipsec_offload(struct ixgbevf_adapter *adapter); void ixgbevf_stop_ipsec_offload(struct ixgbevf_adapter *adapter); void ixgbevf_ipsec_restore(struct ixgbevf_adapter *adapter); @@ -482,7 +482,7 @@ static inline int ixgbevf_ipsec_tx(struct ixgbevf_ring *tx_ring, struct ixgbevf_tx_buffer *first, struct ixgbevf_ipsec_tx_data *itd) { return 0; } -#endif /* CONFIG_XFRM_OFFLOAD */ +#endif /* CONFIG_IXGBEVF_IPSEC */ void ixgbe_napi_add_all(struct ixgbevf_adapter *adapter); void ixgbe_napi_del_all(struct ixgbevf_adapter *adapter); diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -4150,7 +4150,7 @@ static int ixgbevf_xmit_frame_ring(struct sk_buff *skb, first->tx_flags = tx_flags; first->protocol = vlan_get_protocol(skb); -#ifdef CONFIG_XFRM_OFFLOAD +#ifdef CONFIG_IXGBEVF_IPSEC if (skb->sp && !ixgbevf_ipsec_tx(tx_ring, first, &ipsec_tx)) goto out_drop; #endif diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h @@ -796,6 +796,7 @@ struct mvpp2_queue_vector { int nrxqs; u32 pending_cause_rx; struct mvpp2_port *port; + struct cpumask *mask; }; struct mvpp2_port { diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -3298,24 +3298,30 @@ static int mvpp2_irqs_init(struct mvpp2_port *port) for (i = 0; i < port->nqvecs; i++) { struct mvpp2_queue_vector *qv = port->qvecs + i; - if (qv->type == MVPP2_QUEUE_VECTOR_PRIVATE) + if (qv->type == MVPP2_QUEUE_VECTOR_PRIVATE) { + qv->mask = kzalloc(cpumask_size(), GFP_KERNEL); + if (!qv->mask) { + err = -ENOMEM; + goto err; + } + irq_set_status_flags(qv->irq, IRQ_NO_BALANCING); + } err = request_irq(qv->irq, mvpp2_isr, 0, port->dev->name, qv); if (err) goto err; if (qv->type == MVPP2_QUEUE_VECTOR_PRIVATE) { - unsigned long mask = 0; unsigned int cpu; for_each_present_cpu(cpu) { if (mvpp2_cpu_to_thread(port->priv, cpu) == qv->sw_thread_id) - mask |= BIT(cpu); + cpumask_set_cpu(cpu, qv->mask); } - irq_set_affinity_hint(qv->irq, to_cpumask(&mask)); + irq_set_affinity_hint(qv->irq, qv->mask); } } @@ -3325,6 +3331,8 @@ err: struct mvpp2_queue_vector *qv = port->qvecs + i; irq_set_affinity_hint(qv->irq, NULL); + kfree(qv->mask); + qv->mask = NULL; free_irq(qv->irq, qv); } @@ -3339,6 +3347,8 @@ static void mvpp2_irqs_deinit(struct mvpp2_port *port) struct mvpp2_queue_vector *qv = port->qvecs + i; irq_set_affinity_hint(qv->irq, NULL); + kfree(qv->mask); + qv->mask = NULL; irq_clear_status_flags(qv->irq, IRQ_NO_BALANCING); free_irq(qv->irq, qv); } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -43,6 +43,7 @@ #include <linux/vmalloc.h> #include <linux/irq.h> +#include <net/ip.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/ip6_checksum.h> #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -713,43 +713,15 @@ static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb) rq->stats->ecn_mark += !!rc; } -static __be32 mlx5e_get_fcs(struct sk_buff *skb) +static u32 mlx5e_get_fcs(const struct sk_buff *skb) { - int last_frag_sz, bytes_in_prev, nr_frags; - u8 *fcs_p1, *fcs_p2; - skb_frag_t *last_frag; - __be32 fcs_bytes; + const void *fcs_bytes; + u32 _fcs_bytes; - if (!skb_is_nonlinear(skb)) - return *(__be32 *)(skb->data + skb->len - ETH_FCS_LEN); + fcs_bytes = skb_header_pointer(skb, skb->len - ETH_FCS_LEN, + ETH_FCS_LEN, &_fcs_bytes); - nr_frags = skb_shinfo(skb)->nr_frags; - last_frag = &skb_shinfo(skb)->frags[nr_frags - 1]; - last_frag_sz = skb_frag_size(last_frag); - - /* If all FCS data is in last frag */ - if (last_frag_sz >= ETH_FCS_LEN) - return *(__be32 *)(skb_frag_address(last_frag) + - last_frag_sz - ETH_FCS_LEN); - - fcs_p2 = (u8 *)skb_frag_address(last_frag); - bytes_in_prev = ETH_FCS_LEN - last_frag_sz; - - /* Find where the other part of the FCS is - Linear or another frag */ - if (nr_frags == 1) { - fcs_p1 = skb_tail_pointer(skb); - } else { - skb_frag_t *prev_frag = &skb_shinfo(skb)->frags[nr_frags - 2]; - - fcs_p1 = skb_frag_address(prev_frag) + - skb_frag_size(prev_frag); - } - fcs_p1 -= bytes_in_prev; - - memcpy(&fcs_bytes, fcs_p1, bytes_in_prev); - memcpy(((u8 *)&fcs_bytes) + bytes_in_prev, fcs_p2, last_frag_sz); - - return fcs_bytes; + return __get_unaligned_cpu32(fcs_bytes); } static u8 get_ip_proto(struct sk_buff *skb, __be16 proto) @@ -797,8 +769,9 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, network_depth - ETH_HLEN, skb->csum); if (unlikely(netdev->features & NETIF_F_RXFCS)) - skb->csum = csum_add(skb->csum, - (__force __wsum)mlx5e_get_fcs(skb)); + skb->csum = csum_block_add(skb->csum, + (__force __wsum)mlx5e_get_fcs(skb), + skb->len - ETH_FCS_LEN); stats->csum_complete++; return; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -943,8 +943,8 @@ static int mlxsw_devlink_core_bus_device_reload(struct devlink *devlink, mlxsw_core->bus, mlxsw_core->bus_priv, true, devlink); - if (err) - mlxsw_core->reload_fail = true; + mlxsw_core->reload_fail = !!err; + return err; } @@ -1083,8 +1083,15 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core, { struct devlink *devlink = priv_to_devlink(mlxsw_core); - if (mlxsw_core->reload_fail) - goto reload_fail; + if (mlxsw_core->reload_fail) { + if (!reload) + /* Only the parts that were not de-initialized in the + * failed reload attempt need to be de-initialized. + */ + goto reload_fail_deinit; + else + return; + } if (mlxsw_core->driver->fini) mlxsw_core->driver->fini(mlxsw_core); @@ -1098,9 +1105,12 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core, if (!reload) devlink_resources_unregister(devlink, NULL); mlxsw_core->bus->fini(mlxsw_core->bus_priv); - if (reload) - return; -reload_fail: + + return; + +reload_fail_deinit: + devlink_unregister(devlink); + devlink_resources_unregister(devlink, NULL); devlink_free(devlink); } EXPORT_SYMBOL(mlxsw_core_bus_device_unregister); diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -3284,7 +3284,7 @@ static inline void mlxsw_reg_qtct_pack(char *payload, u8 local_port, * Configures the ETS elements. */ #define MLXSW_REG_QEEC_ID 0x400D -#define MLXSW_REG_QEEC_LEN 0x1C +#define MLXSW_REG_QEEC_LEN 0x20 MLXSW_REG_DEFINE(qeec, MLXSW_REG_QEEC_ID, MLXSW_REG_QEEC_LEN); @@ -3326,6 +3326,15 @@ MLXSW_ITEM32(reg, qeec, element_index, 0x04, 0, 8); */ MLXSW_ITEM32(reg, qeec, next_element_index, 0x08, 0, 8); +/* reg_qeec_mise + * Min shaper configuration enable. Enables configuration of the min + * shaper on this ETS element + * 0 - Disable + * 1 - Enable + * Access: RW + */ +MLXSW_ITEM32(reg, qeec, mise, 0x0C, 31, 1); + enum { MLXSW_REG_QEEC_BYTES_MODE, MLXSW_REG_QEEC_PACKETS_MODE, @@ -3342,6 +3351,17 @@ enum { */ MLXSW_ITEM32(reg, qeec, pb, 0x0C, 28, 1); +/* The smallest permitted min shaper rate. */ +#define MLXSW_REG_QEEC_MIS_MIN 200000 /* Kbps */ + +/* reg_qeec_min_shaper_rate + * Min shaper information rate. + * For CPU port, can only be configured for port hierarchy. + * When in bytes mode, value is specified in units of 1000bps. + * Access: RW + */ +MLXSW_ITEM32(reg, qeec, min_shaper_rate, 0x0C, 0, 28); + /* reg_qeec_mase * Max shaper configuration enable. Enables configuration of the max * shaper on this ETS element. diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2740,6 +2740,21 @@ int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl); } +static int mlxsw_sp_port_min_bw_set(struct mlxsw_sp_port *mlxsw_sp_port, + enum mlxsw_reg_qeec_hr hr, u8 index, + u8 next_index, u32 minrate) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char qeec_pl[MLXSW_REG_QEEC_LEN]; + + mlxsw_reg_qeec_pack(qeec_pl, mlxsw_sp_port->local_port, hr, index, + next_index); + mlxsw_reg_qeec_mise_set(qeec_pl, true); + mlxsw_reg_qeec_min_shaper_rate_set(qeec_pl, minrate); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl); +} + int mlxsw_sp_port_prio_tc_set(struct mlxsw_sp_port *mlxsw_sp_port, u8 switch_prio, u8 tclass) { @@ -2817,6 +2832,16 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) return err; } + /* Configure the min shaper for multicast TCs. */ + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlxsw_sp_port_min_bw_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HIERARCY_TC, + i + 8, i, + MLXSW_REG_QEEC_MIS_MIN); + if (err) + return err; + } + /* Map all priorities to traffic class 0. */ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, 0); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -2661,8 +2661,6 @@ static void mlxsw_sp_switchdev_bridge_fdb_event_work(struct work_struct *work) break; case SWITCHDEV_FDB_DEL_TO_DEVICE: fdb_info = &switchdev_work->fdb_info; - if (!fdb_info->added_by_user) - break; mlxsw_sp_port_fdb_set(mlxsw_sp_port, fdb_info, false); break; case SWITCHDEV_FDB_ADD_TO_BRIDGE: /* fall through */ diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -243,7 +243,7 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg, */ int stmmac_mdio_reset(struct mii_bus *bus) { -#if defined(CONFIG_STMMAC_PLATFORM) +#if IS_ENABLED(CONFIG_STMMAC_PLATFORM) struct net_device *ndev = bus->priv; struct stmmac_priv *priv = netdev_priv(ndev); unsigned int mii_address = priv->hw->mii.addr; diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c @@ -30,6 +30,7 @@ #include <linux/sched/mm.h> #include <linux/sched/signal.h> #include <linux/interval_tree_generic.h> +#include <linux/nospec.h> #include "vhost.h" @@ -1387,6 +1388,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg if (idx >= d->nvqs) return -ENOBUFS; + idx = array_index_nospec(idx, d->nvqs); vq = d->vqs[idx]; mutex_lock(&vq->mutex); diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h @@ -62,13 +62,19 @@ /* Error Codes */ enum virtchnl_status_code { VIRTCHNL_STATUS_SUCCESS = 0, - VIRTCHNL_ERR_PARAM = -5, + VIRTCHNL_STATUS_ERR_PARAM = -5, + VIRTCHNL_STATUS_ERR_NO_MEMORY = -18, VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH = -38, VIRTCHNL_STATUS_ERR_CQP_COMPL_ERROR = -39, VIRTCHNL_STATUS_ERR_INVALID_VF_ID = -40, - VIRTCHNL_STATUS_NOT_SUPPORTED = -64, + VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR = -53, + VIRTCHNL_STATUS_ERR_NOT_SUPPORTED = -64, }; +/* Backward compatibility */ +#define VIRTCHNL_ERR_PARAM VIRTCHNL_STATUS_ERR_PARAM +#define VIRTCHNL_STATUS_NOT_SUPPORTED VIRTCHNL_STATUS_ERR_NOT_SUPPORTED + #define VIRTCHNL_LINK_SPEED_100MB_SHIFT 0x1 #define VIRTCHNL_LINK_SPEED_1000MB_SHIFT 0x2 #define VIRTCHNL_LINK_SPEED_10GB_SHIFT 0x3 @@ -831,7 +837,7 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, case VIRTCHNL_OP_EVENT: case VIRTCHNL_OP_UNKNOWN: default: - return VIRTCHNL_ERR_PARAM; + return VIRTCHNL_STATUS_ERR_PARAM; } /* few more checks */ if (err_msg_format || valid_len != msglen) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h @@ -51,6 +51,9 @@ struct bpf_reg_state { * PTR_TO_MAP_VALUE_OR_NULL */ struct bpf_map *map_ptr; + + /* Max size from any of the above. */ + unsigned long raw; }; /* Fixed part of pointer offset, pointer types only */ s32 off; diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h @@ -37,7 +37,9 @@ struct in_device { unsigned long mr_v1_seen; unsigned long mr_v2_seen; unsigned long mr_maxdelay; - unsigned char mr_qrv; + unsigned long mr_qi; /* Query Interval */ + unsigned long mr_qri; /* Query Response Interval */ + unsigned char mr_qrv; /* Query Robustness Variable */ unsigned char mr_gq_running; unsigned char mr_ifc_count; struct timer_list mr_gq_timer; /* general query timer */ diff --git a/include/net/af_unix.h b/include/net/af_unix.h @@ -13,7 +13,7 @@ void unix_notinflight(struct user_struct *user, struct file *fp); void unix_gc(void); void wait_for_unix_gc(void); struct sock *unix_get_socket(struct file *filp); -struct sock *unix_peer_get(struct sock *); +struct sock *unix_peer_get(struct sock *sk); #define UNIX_HASH_SIZE 256 #define UNIX_HASH_BITS 8 @@ -40,7 +40,7 @@ struct unix_skb_parms { u32 consumed; } __randomize_layout; -#define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb)) +#define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb)) #define unix_state_lock(s) spin_lock(&unix_sk(s)->lock) #define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c @@ -2852,10 +2852,6 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn regs[BPF_REG_0].type = NOT_INIT; } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL || fn->ret_type == RET_PTR_TO_MAP_VALUE) { - if (fn->ret_type == RET_PTR_TO_MAP_VALUE) - regs[BPF_REG_0].type = PTR_TO_MAP_VALUE; - else - regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL; /* There is no offset yet applied, variable or fixed */ mark_reg_known_zero(env, regs, BPF_REG_0); /* remember map_ptr, so that check_map_access() @@ -2868,7 +2864,12 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn return -EINVAL; } regs[BPF_REG_0].map_ptr = meta.map_ptr; - regs[BPF_REG_0].id = ++env->id_gen; + if (fn->ret_type == RET_PTR_TO_MAP_VALUE) { + regs[BPF_REG_0].type = PTR_TO_MAP_VALUE; + } else { + regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL; + regs[BPF_REG_0].id = ++env->id_gen; + } } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) { int id = acquire_reference_state(env, insn_idx); if (id < 0) @@ -3046,7 +3047,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, dst_reg->umax_value = umax_ptr; dst_reg->var_off = ptr_reg->var_off; dst_reg->off = ptr_reg->off + smin_val; - dst_reg->range = ptr_reg->range; + dst_reg->raw = ptr_reg->raw; break; } /* A new variable offset is created. Note that off_reg->off @@ -3076,10 +3077,11 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, } dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off); dst_reg->off = ptr_reg->off; + dst_reg->raw = ptr_reg->raw; if (reg_is_pkt_pointer(ptr_reg)) { dst_reg->id = ++env->id_gen; /* something was added to pkt_ptr, set range to zero */ - dst_reg->range = 0; + dst_reg->raw = 0; } break; case BPF_SUB: @@ -3108,7 +3110,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, dst_reg->var_off = ptr_reg->var_off; dst_reg->id = ptr_reg->id; dst_reg->off = ptr_reg->off - smin_val; - dst_reg->range = ptr_reg->range; + dst_reg->raw = ptr_reg->raw; break; } /* A new variable offset is created. If the subtrahend is known @@ -3134,11 +3136,12 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, } dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off); dst_reg->off = ptr_reg->off; + dst_reg->raw = ptr_reg->raw; if (reg_is_pkt_pointer(ptr_reg)) { dst_reg->id = ++env->id_gen; /* something was added to pkt_ptr, set range to zero */ if (smin_val < 0) - dst_reg->range = 0; + dst_reg->raw = 0; } break; case BPF_AND: diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c @@ -3600,6 +3600,11 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; } + if (dev->type != ARPHRD_ETHER) { + NL_SET_ERR_MSG(extack, "FDB add only supported for Ethernet devices"); + return -EINVAL; + } + addr = nla_data(tb[NDA_LLADDR]); err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack); @@ -3704,6 +3709,11 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; } + if (dev->type != ARPHRD_ETHER) { + NL_SET_ERR_MSG(extack, "FDB delete only supported for Ethernet devices"); + return -EINVAL; + } + addr = nla_data(tb[NDA_LLADDR]); err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c @@ -111,13 +111,10 @@ #ifdef CONFIG_IP_MULTICAST /* Parameter names and values are taken from igmp-v2-06 draft */ -#define IGMP_V1_ROUTER_PRESENT_TIMEOUT (400*HZ) -#define IGMP_V2_ROUTER_PRESENT_TIMEOUT (400*HZ) #define IGMP_V2_UNSOLICITED_REPORT_INTERVAL (10*HZ) #define IGMP_V3_UNSOLICITED_REPORT_INTERVAL (1*HZ) +#define IGMP_QUERY_INTERVAL (125*HZ) #define IGMP_QUERY_RESPONSE_INTERVAL (10*HZ) -#define IGMP_QUERY_ROBUSTNESS_VARIABLE 2 - #define IGMP_INITIAL_REPORT_DELAY (1) @@ -935,13 +932,15 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, max_delay = IGMP_QUERY_RESPONSE_INTERVAL; in_dev->mr_v1_seen = jiffies + - IGMP_V1_ROUTER_PRESENT_TIMEOUT; + (in_dev->mr_qrv * in_dev->mr_qi) + + in_dev->mr_qri; group = 0; } else { /* v2 router present */ max_delay = ih->code*(HZ/IGMP_TIMER_SCALE); in_dev->mr_v2_seen = jiffies + - IGMP_V2_ROUTER_PRESENT_TIMEOUT; + (in_dev->mr_qrv * in_dev->mr_qi) + + in_dev->mr_qri; } /* cancel the interface change timer */ in_dev->mr_ifc_count = 0; @@ -981,8 +980,21 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, if (!max_delay) max_delay = 1; /* can't mod w/ 0 */ in_dev->mr_maxdelay = max_delay; - if (ih3->qrv) - in_dev->mr_qrv = ih3->qrv; + + /* RFC3376, 4.1.6. QRV and 4.1.7. QQIC, when the most recently + * received value was zero, use the default or statically + * configured value. + */ + in_dev->mr_qrv = ih3->qrv ?: net->ipv4.sysctl_igmp_qrv; + in_dev->mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL; + + /* RFC3376, 8.3. Query Response Interval: + * The number of seconds represented by the [Query Response + * Interval] must be less than the [Query Interval]. + */ + if (in_dev->mr_qri >= in_dev->mr_qi) + in_dev->mr_qri = (in_dev->mr_qi/HZ - 1)*HZ; + if (!group) { /* general query */ if (ih3->nsrcs) return true; /* no sources allowed */ @@ -1723,18 +1735,30 @@ void ip_mc_down(struct in_device *in_dev) ip_mc_dec_group(in_dev, IGMP_ALL_HOSTS); } -void ip_mc_init_dev(struct in_device *in_dev) -{ #ifdef CONFIG_IP_MULTICAST +static void ip_mc_reset(struct in_device *in_dev) +{ struct net *net = dev_net(in_dev->dev); + + in_dev->mr_qi = IGMP_QUERY_INTERVAL; + in_dev->mr_qri = IGMP_QUERY_RESPONSE_INTERVAL; + in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv; +} +#else +static void ip_mc_reset(struct in_device *in_dev) +{ +} #endif + +void ip_mc_init_dev(struct in_device *in_dev) +{ ASSERT_RTNL(); #ifdef CONFIG_IP_MULTICAST timer_setup(&in_dev->mr_gq_timer, igmp_gq_timer_expire, 0); timer_setup(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire, 0); - in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv; #endif + ip_mc_reset(in_dev); spin_lock_init(&in_dev->mc_tomb_lock); } @@ -1744,15 +1768,10 @@ void ip_mc_init_dev(struct in_device *in_dev) void ip_mc_up(struct in_device *in_dev) { struct ip_mc_list *pmc; -#ifdef CONFIG_IP_MULTICAST - struct net *net = dev_net(in_dev->dev); -#endif ASSERT_RTNL(); -#ifdef CONFIG_IP_MULTICAST - in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv; -#endif + ip_mc_reset(in_dev); ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS); for_each_pmc_rtnl(in_dev, pmc) { diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c @@ -145,6 +145,7 @@ msg_bytes_ready: ret = err; goto out; } + copied = -EAGAIN; } ret = copied; out: diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c @@ -3030,7 +3030,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, * is already present */ if (mac_proto != MAC_PROTO_NONE) return -EINVAL; - mac_proto = MAC_PROTO_NONE; + mac_proto = MAC_PROTO_ETHERNET; break; case OVS_ACTION_ATTR_POP_ETH: @@ -3038,7 +3038,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, return -EINVAL; if (vlan_tci & htons(VLAN_TAG_PRESENT)) return -EINVAL; - mac_proto = MAC_PROTO_ETHERNET; + mac_proto = MAC_PROTO_NONE; break; case OVS_ACTION_ATTR_PUSH_NSH: diff --git a/net/sctp/associola.c b/net/sctp/associola.c @@ -499,8 +499,9 @@ void sctp_assoc_set_primary(struct sctp_association *asoc, void sctp_assoc_rm_peer(struct sctp_association *asoc, struct sctp_transport *peer) { - struct list_head *pos; - struct sctp_transport *transport; + struct sctp_transport *transport; + struct list_head *pos; + struct sctp_chunk *ch; pr_debug("%s: association:%p addr:%pISpc\n", __func__, asoc, &peer->ipaddr.sa); @@ -564,7 +565,6 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc, */ if (!list_empty(&peer->transmitted)) { struct sctp_transport *active = asoc->peer.active_path; - struct sctp_chunk *ch; /* Reset the transport of each chunk on this list */ list_for_each_entry(ch, &peer->transmitted, @@ -586,6 +586,10 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc, sctp_transport_hold(active); } + list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) + if (ch->transport == peer) + ch->transport = NULL; + asoc->peer.transport_count--; sctp_transport_free(peer); diff --git a/net/sctp/socket.c b/net/sctp/socket.c @@ -7083,14 +7083,15 @@ static int sctp_getsockopt_pr_assocstatus(struct sock *sk, int len, } policy = params.sprstat_policy; - if (!policy || (policy & ~(SCTP_PR_SCTP_MASK | SCTP_PR_SCTP_ALL))) + if (!policy || (policy & ~(SCTP_PR_SCTP_MASK | SCTP_PR_SCTP_ALL)) || + ((policy & SCTP_PR_SCTP_ALL) && (policy & SCTP_PR_SCTP_MASK))) goto out; asoc = sctp_id2assoc(sk, params.sprstat_assoc_id); if (!asoc) goto out; - if (policy & SCTP_PR_SCTP_ALL) { + if (policy == SCTP_PR_SCTP_ALL) { params.sprstat_abandoned_unsent = 0; params.sprstat_abandoned_sent = 0; for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) { @@ -7142,7 +7143,8 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len, } policy = params.sprstat_policy; - if (!policy || (policy & ~(SCTP_PR_SCTP_MASK | SCTP_PR_SCTP_ALL))) + if (!policy || (policy & ~(SCTP_PR_SCTP_MASK | SCTP_PR_SCTP_ALL)) || + ((policy & SCTP_PR_SCTP_ALL) && (policy & SCTP_PR_SCTP_MASK))) goto out; asoc = sctp_id2assoc(sk, params.sprstat_assoc_id); diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig @@ -8,7 +8,6 @@ config XFRM config XFRM_OFFLOAD bool - depends on XFRM config XFRM_ALGO tristate diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c @@ -2084,19 +2084,19 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog, prog->expected_attach_type = type; } -#define BPF_PROG_SEC_IMPL(string, ptype, eatype, atype) \ - { string, sizeof(string) - 1, ptype, eatype, atype } +#define BPF_PROG_SEC_IMPL(string, ptype, eatype, is_attachable, atype) \ + { string, sizeof(string) - 1, ptype, eatype, is_attachable, atype } /* Programs that can NOT be attached. */ -#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, -EINVAL) +#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 0) /* Programs that can be attached. */ #define BPF_APROG_SEC(string, ptype, atype) \ - BPF_PROG_SEC_IMPL(string, ptype, 0, atype) + BPF_PROG_SEC_IMPL(string, ptype, 0, 1, atype) /* Programs that must specify expected attach type at load time. */ #define BPF_EAPROG_SEC(string, ptype, eatype) \ - BPF_PROG_SEC_IMPL(string, ptype, eatype, eatype) + BPF_PROG_SEC_IMPL(string, ptype, eatype, 1, eatype) /* Programs that can be attached but attach type can't be identified by section * name. Kept for backward compatibility. @@ -2108,6 +2108,7 @@ static const struct { size_t len; enum bpf_prog_type prog_type; enum bpf_attach_type expected_attach_type; + int is_attachable; enum bpf_attach_type attach_type; } section_names[] = { BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER), @@ -2198,7 +2199,7 @@ int libbpf_attach_type_by_name(const char *name, for (i = 0; i < ARRAY_SIZE(section_names); i++) { if (strncmp(name, section_names[i].sec, section_names[i].len)) continue; - if (section_names[i].attach_type == -EINVAL) + if (!section_names[i].is_attachable) return -EINVAL; *attach_type = section_names[i].attach_type; return 0; diff --git a/tools/testing/selftests/bpf/flow_dissector_load.c b/tools/testing/selftests/bpf/flow_dissector_load.c @@ -11,6 +11,8 @@ #include <bpf/bpf.h> #include <bpf/libbpf.h> +#include "bpf_rlimit.h" + const char *cfg_pin_path = "/sys/fs/bpf/flow_dissector"; const char *cfg_map_name = "jmp_table"; bool cfg_attach = true; diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id.sh b/tools/testing/selftests/bpf/test_skb_cgroup_id.sh @@ -10,7 +10,7 @@ wait_for_ip() echo -n "Wait for testing link-local IP to become available " for _i in $(seq ${MAX_PING_TRIES}); do echo -n "." - if ping -6 -q -c 1 -W 1 ff02::1%${TEST_IF} >/dev/null 2>&1; then + if $PING6 -c 1 -W 1 ff02::1%${TEST_IF} >/dev/null 2>&1; then echo " OK" return fi @@ -58,5 +58,6 @@ BPF_PROG_OBJ="${DIR}/test_skb_cgroup_id_kern.o" BPF_PROG_SECTION="cgroup_id_logger" BPF_PROG_ID=0 PROG="${DIR}/test_skb_cgroup_id_user" +type ping6 >/dev/null 2>&1 && PING6="ping6" || PING6="ping -6" main diff --git a/tools/testing/selftests/bpf/test_sock_addr.sh b/tools/testing/selftests/bpf/test_sock_addr.sh @@ -4,7 +4,8 @@ set -eu ping_once() { - ping -${1} -q -c 1 -W 1 ${2%%/*} >/dev/null 2>&1 + type ping${1} >/dev/null 2>&1 && PING="ping${1}" || PING="ping -${1}" + $PING -q -c 1 -W 1 ${2%%/*} >/dev/null 2>&1 } wait_for_ip() diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c @@ -76,7 +76,7 @@ struct bpf_test { int fixup_percpu_cgroup_storage[MAX_FIXUPS]; const char *errstr; const char *errstr_unpriv; - uint32_t retval; + uint32_t retval, retval_unpriv; enum { UNDEF, ACCEPT, @@ -3084,6 +3084,8 @@ static struct bpf_test tests[] = { .fixup_prog1 = { 2 }, .result = ACCEPT, .retval = 42, + /* Verifier rewrite for unpriv skips tail call here. */ + .retval_unpriv = 2, }, { "stack pointer arithmetic", @@ -6455,6 +6457,256 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, { + "map access: known scalar += value_ptr", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .retval = 1, + }, + { + "map access: value_ptr += known scalar", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .retval = 1, + }, + { + "map access: unknown scalar += value_ptr", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .retval = 1, + }, + { + "map access: value_ptr += unknown scalar", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .retval = 1, + }, + { + "map access: value_ptr += value_ptr", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = REJECT, + .errstr = "R0 pointer += pointer prohibited", + }, + { + "map access: known scalar -= value_ptr", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 4), + BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = REJECT, + .errstr = "R1 tried to subtract pointer from scalar", + }, + { + "map access: value_ptr -= known scalar", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 4), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = REJECT, + .errstr = "R0 min value is outside of the array range", + }, + { + "map access: value_ptr -= known scalar, 2", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_2), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .retval = 1, + }, + { + "map access: unknown scalar -= value_ptr", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf), + BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = REJECT, + .errstr = "R1 tried to subtract pointer from scalar", + }, + { + "map access: value_ptr -= unknown scalar", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = REJECT, + .errstr = "R0 min value is negative", + }, + { + "map access: value_ptr -= unknown scalar, 2", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf), + BPF_ALU64_IMM(BPF_OR, BPF_REG_1, 0x7), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .retval = 1, + }, + { + "map access: value_ptr -= value_ptr", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = REJECT, + .errstr = "R0 invalid mem access 'inv'", + .errstr_unpriv = "R0 pointer -= pointer prohibited", + }, + { "map lookup helper access to map", .insns = { BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), @@ -13899,6 +14151,33 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_map_type prog_type, } } +static int set_admin(bool admin) +{ + cap_t caps; + const cap_value_t cap_val = CAP_SYS_ADMIN; + int ret = -1; + + caps = cap_get_proc(); + if (!caps) { + perror("cap_get_proc"); + return -1; + } + if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_val, + admin ? CAP_SET : CAP_CLEAR)) { + perror("cap_set_flag"); + goto out; + } + if (cap_set_proc(caps)) { + perror("cap_set_proc"); + goto out; + } + ret = 0; +out: + if (cap_free(caps)) + perror("cap_free"); + return ret; +} + static void do_test_single(struct bpf_test *test, bool unpriv, int *passes, int *errors) { @@ -13907,6 +14186,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, struct bpf_insn *prog = test->insns; int map_fds[MAX_NR_MAPS]; const char *expected_err; + uint32_t expected_val; uint32_t retval; int i, err; @@ -13926,6 +14206,8 @@ static void do_test_single(struct bpf_test *test, bool unpriv, test->result_unpriv : test->result; expected_err = unpriv && test->errstr_unpriv ? test->errstr_unpriv : test->errstr; + expected_val = unpriv && test->retval_unpriv ? + test->retval_unpriv : test->retval; reject_from_alignment = fd_prog < 0 && (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) && @@ -13959,16 +14241,20 @@ static void do_test_single(struct bpf_test *test, bool unpriv, __u8 tmp[TEST_DATA_LEN << 2]; __u32 size_tmp = sizeof(tmp); + if (unpriv) + set_admin(true); err = bpf_prog_test_run(fd_prog, 1, test->data, sizeof(test->data), tmp, &size_tmp, &retval, NULL); + if (unpriv) + set_admin(false); if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) { printf("Unexpected bpf_prog_test_run error\n"); goto fail_log; } - if (!err && retval != test->retval && - test->retval != POINTER_VALUE) { - printf("FAIL retval %d != %d\n", retval, test->retval); + if (!err && retval != expected_val && + expected_val != POINTER_VALUE) { + printf("FAIL retval %d != %d\n", retval, expected_val); goto fail_log; } } @@ -14011,33 +14297,6 @@ static bool is_admin(void) return (sysadmin == CAP_SET); } -static int set_admin(bool admin) -{ - cap_t caps; - const cap_value_t cap_val = CAP_SYS_ADMIN; - int ret = -1; - - caps = cap_get_proc(); - if (!caps) { - perror("cap_get_proc"); - return -1; - } - if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_val, - admin ? CAP_SET : CAP_CLEAR)) { - perror("cap_set_flag"); - goto out; - } - if (cap_set_proc(caps)) { - perror("cap_set_proc"); - goto out; - } - ret = 0; -out: - if (cap_free(caps)) - perror("cap_free"); - return ret; -} - static void get_unpriv_disabled() { char buf[2]; diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh @@ -25,24 +25,24 @@ # Thus we set MTU to 10K on all involved interfaces. Then both unicast and # multicast traffic uses 8K frames. # -# +-----------------------+ +----------------------------------+ -# | H1 | | H2 | -# | | | unicast --> + $h2.111 | -# | | | traffic | 192.0.2.129/28 | -# | multicast | | | e-qos-map 0:1 | -# | traffic | | | | -# | $h1 + <----- | | + $h2 | -# +-----|-----------------+ +--------------|-------------------+ -# | | -# +-----|-------------------------------------------------|-------------------+ -# | + $swp1 + $swp2 | -# | | >1Gbps | >1Gbps | -# | +---|----------------+ +----------|----------------+ | -# | | + $swp1.1 | | + $swp2.111 | | +# +---------------------------+ +----------------------------------+ +# | H1 | | H2 | +# | | | unicast --> + $h2.111 | +# | multicast | | traffic | 192.0.2.129/28 | +# | traffic | | | e-qos-map 0:1 | +# | $h1 + <----- | | | | +# | 192.0.2.65/28 | | | + $h2 | +# +---------------|-----------+ +--------------|-------------------+ +# | | +# +---------------|---------------------------------------|-------------------+ +# | $swp1 + + $swp2 | +# | >1Gbps | | >1Gbps | +# | +-------------|------+ +----------|----------------+ | +# | | $swp1.1 + | | + $swp2.111 | | # | | BR1 | SW | BR111 | | -# | | + $swp3.1 | | + $swp3.111 | | -# | +---|----------------+ +----------|----------------+ | -# | \_________________________________________________/ | +# | | $swp3.1 + | | + $swp3.111 | | +# | +-------------|------+ +----------|----------------+ | +# | \_______________________________________/ | # | | | # | + $swp3 | # | | 1Gbps bottleneck | @@ -51,6 +51,7 @@ # | # +--|-----------------+ # | + $h3 H3 | +# | | 192.0.2.66/28 | # | | | # | + $h3.111 | # | 192.0.2.130/28 | @@ -59,6 +60,7 @@ ALL_TESTS=" ping_ipv4 test_mc_aware + test_uc_aware " lib_dir=$(dirname $0)/../../../net/forwarding @@ -68,14 +70,14 @@ source $lib_dir/lib.sh h1_create() { - simple_if_init $h1 + simple_if_init $h1 192.0.2.65/28 mtu_set $h1 10000 } h1_destroy() { mtu_restore $h1 - simple_if_fini $h1 + simple_if_fini $h1 192.0.2.65/28 } h2_create() @@ -97,7 +99,7 @@ h2_destroy() h3_create() { - simple_if_init $h3 + simple_if_init $h3 192.0.2.66/28 mtu_set $h3 10000 vlan_create $h3 111 v$h3 192.0.2.130/28 @@ -108,7 +110,7 @@ h3_destroy() vlan_destroy $h3 111 mtu_restore $h3 - simple_if_fini $h3 + simple_if_fini $h3 192.0.2.66/28 } switch_create() @@ -251,7 +253,7 @@ measure_uc_rate() # average ingress rate to somewhat mitigate this. local min_ingress=2147483648 - mausezahn $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \ + $MZ $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \ -a own -b $h3mac -t udp -q & sleep 1 @@ -291,7 +293,7 @@ test_mc_aware() check_err $? "Could not get high enough UC-only ingress rate" local ucth1=${uc_rate[1]} - mausezahn $h1 -p 8000 -c 0 -a own -b bc -t udp -q & + $MZ $h1 -p 8000 -c 0 -a own -b bc -t udp -q & local d0=$(date +%s) local t0=$(ethtool_stats_get $h3 rx_octets_prio_0) @@ -311,7 +313,7 @@ test_mc_aware() ret = 100 * ($ucth1 - $ucth2) / $ucth1 if (ret > 0) { ret } else { 0 } ") - check_err $(bc <<< "$deg > 10") + check_err $(bc <<< "$deg > 25") local interval=$((d1 - d0)) local mc_ir=$(rate $u0 $u1 $interval) @@ -335,6 +337,51 @@ test_mc_aware() echo " egress UC throughput $(humanize ${uc_rate_2[1]})" echo " ingress MC throughput $(humanize $mc_ir)" echo " egress MC throughput $(humanize $mc_er)" + echo +} + +test_uc_aware() +{ + RET=0 + + $MZ $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \ + -a own -b $h3mac -t udp -q & + + local d0=$(date +%s) + local t0=$(ethtool_stats_get $h3 rx_octets_prio_1) + local u0=$(ethtool_stats_get $swp2 rx_octets_prio_1) + sleep 1 + + local attempts=50 + local passes=0 + local i + + for ((i = 0; i < attempts; ++i)); do + if $ARPING -c 1 -I $h1 -b 192.0.2.66 -q -w 0.1; then + ((passes++)) + fi + + sleep 0.1 + done + + local d1=$(date +%s) + local t1=$(ethtool_stats_get $h3 rx_octets_prio_1) + local u1=$(ethtool_stats_get $swp2 rx_octets_prio_1) + + local interval=$((d1 - d0)) + local uc_ir=$(rate $u0 $u1 $interval) + local uc_er=$(rate $t0 $t1 $interval) + + ((attempts == passes)) + check_err $? + + # Suppress noise from killing mausezahn. + { kill %% && wait; } 2>/dev/null + + log_test "MC performace under UC overload" + echo " ingress UC throughput $(humanize ${uc_ir})" + echo " egress UC throughput $(humanize ${uc_er})" + echo " sent $attempts BC ARPs, got $passes responses" } trap cleanup EXIT