whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit 90ad18418c2d3db23ee827cdd74fed2ca9b70a18
parent 0778a9f2dd924c3af41971ba40eec44793aea531
Author: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date:   Fri, 12 Oct 2018 09:01:59 +0200

Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

David writes:
  "Networking

   1) RXRPC receive path fixes from David Howells.

   2) Re-export __skb_recv_udp(), from Jiri Kosina.

   3) Fix refcounting in u32 classificer, from Al Viro.

   4) Userspace netlink ABI fixes from Eugene Syromiatnikov.

   5) Don't double iounmap on rmmod in ena driver, from Arthur
      Kiyanovski.

   6) Fix devlink string attribute handling, we must pull a copy into a
      kernel buffer if the lifetime extends past the netlink request.
      From Moshe Shemesh.

   7) Fix hangs in RDS, from Ka-Cheong Poon.

   8) Fix recursive locking lockdep warnings in tipc, from Ying Xue.

   9) Clear RX irq correctly in socionext, from Ilias Apalodimas.

   10) bcm_sf2 fixes from Florian Fainelli."

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (38 commits)
  net: dsa: bcm_sf2: Call setup during switch resume
  net: dsa: bcm_sf2: Fix unbind ordering
  net: phy: sfp: remove sfp_mutex's definition
  r8169: set RX_MULTI_EN bit in RxConfig for 8168F-family chips
  net: socionext: clear rx irq correctly
  net/mlx4_core: Fix warnings during boot on driverinit param set failures
  tipc: eliminate possible recursive locking detected by LOCKDEP
  selftests: udpgso_bench.sh explicitly requires bash
  selftests: rtnetlink.sh explicitly requires bash.
  qmi_wwan: Added support for Gemalto's Cinterion ALASxx WWAN interface
  tipc: queue socket protocol error messages into socket receive buffer
  tipc: set link tolerance correctly in broadcast link
  net: ipv4: don't let PMTU updates increase route MTU
  net: ipv4: update fnhe_pmtu when first hop's MTU changes
  net/ipv6: stop leaking percpu memory in fib6 info
  rds: RDS (tcp) hangs on sendto() to unresponding address
  net: make skb_partial_csum_set() more robust against overflows
  devlink: Add helper function for safely copy string param
  devlink: Fix param cmode driverinit for string type
  devlink: Fix param set handling for string type
  ...

Diffstat:
Mdrivers/net/dsa/bcm_sf2.c | 14+++-----------
Mdrivers/net/ethernet/amazon/ena/ena_eth_com.c | 8++++----
Mdrivers/net/ethernet/amazon/ena/ena_netdev.c | 22++++++++++++----------
Mdrivers/net/ethernet/mellanox/mlx4/main.c | 43+++++++++++++++----------------------------
Mdrivers/net/ethernet/realtek/r8169.c | 4++--
Mdrivers/net/ethernet/socionext/netsec.c | 5++++-
Mdrivers/net/phy/sfp.c | 2--
Mdrivers/net/usb/qmi_wwan.c | 1+
Minclude/linux/netdevice.h | 7+++++++
Minclude/net/devlink.h | 12++++++++++--
Minclude/net/ip_fib.h | 1+
Minclude/trace/events/rxrpc.h | 1+
Minclude/uapi/linux/smc_diag.h | 25++++++++++++++-----------
Minclude/uapi/linux/udp.h | 1+
Mnet/core/dev.c | 28++++++++++++++++++++++++++--
Mnet/core/devlink.c | 43+++++++++++++++++++++++++++++++++++++------
Mnet/core/skbuff.c | 12+++++++-----
Mnet/ipv4/fib_frontend.c | 12++++++++----
Mnet/ipv4/fib_semantics.c | 50++++++++++++++++++++++++++++++++++++++++++++++++++
Mnet/ipv4/route.c | 7++++---
Mnet/ipv4/udp.c | 2+-
Mnet/ipv6/ip6_fib.c | 2++
Mnet/rds/send.c | 13++++++++++---
Mnet/rxrpc/ar-internal.h | 23+++++++++++++----------
Mnet/rxrpc/call_accept.c | 27++++++++++++++++++---------
Mnet/rxrpc/call_object.c | 5+++--
Mnet/rxrpc/conn_client.c | 10++++++----
Mnet/rxrpc/conn_event.c | 26+++++++++++++++-----------
Mnet/rxrpc/input.c | 253+++++++++++++++++++++++++++++++++++++++++--------------------------------------
Mnet/rxrpc/local_object.c | 30++++++++++++++++++++++++------
Mnet/rxrpc/peer_event.c | 5+++++
Mnet/rxrpc/peer_object.c | 29++++++++++++++++++-----------
Mnet/sched/cls_u32.c | 10+++++-----
Mnet/tipc/link.c | 27++++++++++++++++++++-------
Mnet/tipc/socket.c | 14++++++++++++--
Mtools/testing/selftests/net/rtnetlink.sh | 2+-
Mtools/testing/selftests/net/udpgso_bench.sh | 2+-
37 files changed, 493 insertions(+), 285 deletions(-)

diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c @@ -703,7 +703,6 @@ static int bcm_sf2_sw_suspend(struct dsa_switch *ds) static int bcm_sf2_sw_resume(struct dsa_switch *ds) { struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); - unsigned int port; int ret; ret = bcm_sf2_sw_rst(priv); @@ -715,14 +714,7 @@ static int bcm_sf2_sw_resume(struct dsa_switch *ds) if (priv->hw_params.num_gphy == 1) bcm_sf2_gphy_enable_set(ds, true); - for (port = 0; port < DSA_MAX_PORTS; port++) { - if (dsa_is_user_port(ds, port)) - bcm_sf2_port_setup(ds, port, NULL); - else if (dsa_is_cpu_port(ds, port)) - bcm_sf2_imp_setup(ds, port); - } - - bcm_sf2_enable_acb(ds); + ds->ops->setup(ds); return 0; } @@ -1173,10 +1165,10 @@ static int bcm_sf2_sw_remove(struct platform_device *pdev) { struct bcm_sf2_priv *priv = platform_get_drvdata(pdev); - /* Disable all ports and interrupts */ priv->wol_ports_mask = 0; - bcm_sf2_sw_suspend(priv->dev->ds); dsa_unregister_switch(priv->dev->ds); + /* Disable all ports and interrupts */ + bcm_sf2_sw_suspend(priv->dev->ds); bcm_sf2_mdio_unregister(priv); return 0; diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.c b/drivers/net/ethernet/amazon/ena/ena_eth_com.c @@ -245,11 +245,11 @@ static inline void ena_com_rx_set_flags(struct ena_com_rx_ctx *ena_rx_ctx, (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_MASK) >> ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_SHIFT; ena_rx_ctx->l3_csum_err = - (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_MASK) >> - ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_SHIFT; + !!((cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_MASK) >> + ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_SHIFT); ena_rx_ctx->l4_csum_err = - (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_MASK) >> - ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_SHIFT; + !!((cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_MASK) >> + ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_SHIFT); ena_rx_ctx->hash = cdesc->hash; ena_rx_ctx->frag = (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_MASK) >> diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1575,8 +1575,6 @@ static int ena_up_complete(struct ena_adapter *adapter) if (rc) return rc; - ena_init_napi(adapter); - ena_change_mtu(adapter->netdev, adapter->netdev->mtu); ena_refill_all_rx_bufs(adapter); @@ -1730,6 +1728,13 @@ static int ena_up(struct ena_adapter *adapter) ena_setup_io_intr(adapter); + /* napi poll functions should be initialized before running + * request_irq(), to handle a rare condition where there is a pending + * interrupt, causing the ISR to fire immediately while the poll + * function wasn't set yet, causing a null dereference + */ + ena_init_napi(adapter); + rc = ena_request_io_irq(adapter); if (rc) goto err_req_irq; @@ -2619,7 +2624,11 @@ err_disable_msix: ena_free_mgmnt_irq(adapter); ena_disable_msix(adapter); err_device_destroy: + ena_com_abort_admin_commands(ena_dev); + ena_com_wait_for_abort_completion(ena_dev); ena_com_admin_destroy(ena_dev); + ena_com_mmio_reg_read_request_destroy(ena_dev); + ena_com_dev_reset(ena_dev, ENA_REGS_RESET_DRIVER_INVALID_STATE); err: clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags); clear_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags); @@ -3099,15 +3108,8 @@ err_rss_init: static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev) { - int release_bars; - - if (ena_dev->mem_bar) - devm_iounmap(&pdev->dev, ena_dev->mem_bar); - - if (ena_dev->reg_bar) - devm_iounmap(&pdev->dev, ena_dev->reg_bar); + int release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK; - release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK; pci_release_selected_regions(pdev, release_bars); } diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -260,47 +260,34 @@ static const struct devlink_param mlx4_devlink_params[] = { NULL, NULL, NULL), }; -static void mlx4_devlink_set_init_value(struct devlink *devlink, u32 param_id, - union devlink_param_value init_val) -{ - struct mlx4_priv *priv = devlink_priv(devlink); - struct mlx4_dev *dev = &priv->dev; - int err; - - err = devlink_param_driverinit_value_set(devlink, param_id, init_val); - if (err) - mlx4_warn(dev, - "devlink set parameter %u value failed (err = %d)", - param_id, err); -} - static void mlx4_devlink_set_params_init_values(struct devlink *devlink) { union devlink_param_value value; value.vbool = !!mlx4_internal_err_reset; - mlx4_devlink_set_init_value(devlink, - DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET, - value); + devlink_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET, + value); value.vu32 = 1UL << log_num_mac; - mlx4_devlink_set_init_value(devlink, - DEVLINK_PARAM_GENERIC_ID_MAX_MACS, value); + devlink_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_MAX_MACS, + value); value.vbool = enable_64b_cqe_eqe; - mlx4_devlink_set_init_value(devlink, - MLX4_DEVLINK_PARAM_ID_ENABLE_64B_CQE_EQE, - value); + devlink_param_driverinit_value_set(devlink, + MLX4_DEVLINK_PARAM_ID_ENABLE_64B_CQE_EQE, + value); value.vbool = enable_4k_uar; - mlx4_devlink_set_init_value(devlink, - MLX4_DEVLINK_PARAM_ID_ENABLE_4K_UAR, - value); + devlink_param_driverinit_value_set(devlink, + MLX4_DEVLINK_PARAM_ID_ENABLE_4K_UAR, + value); value.vbool = false; - mlx4_devlink_set_init_value(devlink, - DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT, - value); + devlink_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT, + value); } static inline void mlx4_set_num_reserved_uars(struct mlx4_dev *dev, diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c @@ -4282,8 +4282,8 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp) RTL_W32(tp, RxConfig, RX_FIFO_THRESH | RX_DMA_BURST); break; case RTL_GIGA_MAC_VER_18 ... RTL_GIGA_MAC_VER_24: - case RTL_GIGA_MAC_VER_34: - case RTL_GIGA_MAC_VER_35: + case RTL_GIGA_MAC_VER_34 ... RTL_GIGA_MAC_VER_36: + case RTL_GIGA_MAC_VER_38: RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST); break; case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51: diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c @@ -735,8 +735,11 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget) u16 idx = dring->tail; struct netsec_de *de = dring->vaddr + (DESC_SZ * idx); - if (de->attr & (1U << NETSEC_RX_PKT_OWN_FIELD)) + if (de->attr & (1U << NETSEC_RX_PKT_OWN_FIELD)) { + /* reading the register clears the irq */ + netsec_read(priv, NETSEC_REG_NRM_RX_PKTCNT); break; + } /* This barrier is needed to keep us from reading * any other fields out of the netsec_de until we have diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c @@ -163,8 +163,6 @@ static const enum gpiod_flags gpio_flags[] = { /* Give this long for the PHY to reset. */ #define T_PHY_RESET_MS 50 -static DEFINE_MUTEX(sfp_mutex); - struct sff_data { unsigned int gpios; bool (*module_supported)(const struct sfp_eeprom_id *id); diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c @@ -1241,6 +1241,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x0b3c, 0xc00b, 4)}, /* Olivetti Olicard 500 */ {QMI_FIXED_INTF(0x1e2d, 0x0060, 4)}, /* Cinterion PLxx */ {QMI_FIXED_INTF(0x1e2d, 0x0053, 4)}, /* Cinterion PHxx,PXxx */ + {QMI_FIXED_INTF(0x1e2d, 0x0063, 10)}, /* Cinterion ALASxx (1 RmNet) */ {QMI_FIXED_INTF(0x1e2d, 0x0082, 4)}, /* Cinterion PHxx,PXxx (2 RmNet) */ {QMI_FIXED_INTF(0x1e2d, 0x0082, 5)}, /* Cinterion PHxx,PXxx (2 RmNet) */ {QMI_FIXED_INTF(0x1e2d, 0x0083, 4)}, /* Cinterion PHxx,PXxx (1 RmNet + USB Audio)*/ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h @@ -2458,6 +2458,13 @@ struct netdev_notifier_info { struct netlink_ext_ack *extack; }; +struct netdev_notifier_info_ext { + struct netdev_notifier_info info; /* must be first */ + union { + u32 mtu; + } ext; +}; + struct netdev_notifier_change_info { struct netdev_notifier_info info; /* must be first */ unsigned int flags_changed; diff --git a/include/net/devlink.h b/include/net/devlink.h @@ -298,7 +298,7 @@ struct devlink_resource { #define DEVLINK_RESOURCE_ID_PARENT_TOP 0 -#define DEVLINK_PARAM_MAX_STRING_VALUE 32 +#define __DEVLINK_PARAM_MAX_STRING_VALUE 32 enum devlink_param_type { DEVLINK_PARAM_TYPE_U8, DEVLINK_PARAM_TYPE_U16, @@ -311,7 +311,7 @@ union devlink_param_value { u8 vu8; u16 vu16; u32 vu32; - const char *vstr; + char vstr[__DEVLINK_PARAM_MAX_STRING_VALUE]; bool vbool; }; @@ -553,6 +553,8 @@ int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id, int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id, union devlink_param_value init_val); void devlink_param_value_changed(struct devlink *devlink, u32 param_id); +void devlink_param_value_str_fill(union devlink_param_value *dst_val, + const char *src); struct devlink_region *devlink_region_create(struct devlink *devlink, const char *region_name, u32 region_max_snapshots, @@ -789,6 +791,12 @@ devlink_param_value_changed(struct devlink *devlink, u32 param_id) { } +static inline void +devlink_param_value_str_fill(union devlink_param_value *dst_val, + const char *src) +{ +} + static inline struct devlink_region * devlink_region_create(struct devlink *devlink, const char *region_name, diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h @@ -394,6 +394,7 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev); int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force); int fib_sync_down_addr(struct net_device *dev, __be32 local); int fib_sync_up(struct net_device *dev, unsigned int nh_flags); +void fib_sync_mtu(struct net_device *dev, u32 orig_mtu); #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h @@ -931,6 +931,7 @@ TRACE_EVENT(rxrpc_tx_packet, TP_fast_assign( __entry->call = call_id; memcpy(&__entry->whdr, whdr, sizeof(__entry->whdr)); + __entry->where = where; ), TP_printk("c=%08x %08x:%08x:%08x:%04x %08x %08x %02x %02x %s %s", diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h @@ -18,14 +18,17 @@ struct smc_diag_req { * on the internal clcsock, and more SMC-related socket data */ struct smc_diag_msg { - __u8 diag_family; - __u8 diag_state; - __u8 diag_mode; - __u8 diag_shutdown; + __u8 diag_family; + __u8 diag_state; + union { + __u8 diag_mode; + __u8 diag_fallback; /* the old name of the field */ + }; + __u8 diag_shutdown; struct inet_diag_sockid id; - __u32 diag_uid; - __u64 diag_inode; + __u32 diag_uid; + __aligned_u64 diag_inode; }; /* Mode of a connection */ @@ -99,11 +102,11 @@ struct smc_diag_fallback { }; struct smcd_diag_dmbinfo { /* SMC-D Socket internals */ - __u32 linkid; /* Link identifier */ - __u64 peer_gid; /* Peer GID */ - __u64 my_gid; /* My GID */ - __u64 token; /* Token of DMB */ - __u64 peer_token; /* Token of remote DMBE */ + __u32 linkid; /* Link identifier */ + __aligned_u64 peer_gid; /* Peer GID */ + __aligned_u64 my_gid; /* My GID */ + __aligned_u64 token; /* Token of DMB */ + __aligned_u64 peer_token; /* Token of remote DMBE */ }; #endif /* _UAPI_SMC_DIAG_H_ */ diff --git a/include/uapi/linux/udp.h b/include/uapi/linux/udp.h @@ -40,5 +40,6 @@ struct udphdr { #define UDP_ENCAP_L2TPINUDP 3 /* rfc2661 */ #define UDP_ENCAP_GTP0 4 /* GSM TS 09.60 */ #define UDP_ENCAP_GTP1U 5 /* 3GPP TS 29.060 */ +#define UDP_ENCAP_RXRPC 6 #endif /* _UAPI_LINUX_UDP_H */ diff --git a/net/core/dev.c b/net/core/dev.c @@ -1752,6 +1752,28 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev) } EXPORT_SYMBOL(call_netdevice_notifiers); +/** + * call_netdevice_notifiers_mtu - call all network notifier blocks + * @val: value passed unmodified to notifier function + * @dev: net_device pointer passed unmodified to notifier function + * @arg: additional u32 argument passed to the notifier function + * + * Call all network notifier blocks. Parameters and return value + * are as for raw_notifier_call_chain(). + */ +static int call_netdevice_notifiers_mtu(unsigned long val, + struct net_device *dev, u32 arg) +{ + struct netdev_notifier_info_ext info = { + .info.dev = dev, + .ext.mtu = arg, + }; + + BUILD_BUG_ON(offsetof(struct netdev_notifier_info_ext, info) != 0); + + return call_netdevice_notifiers_info(val, &info.info); +} + #ifdef CONFIG_NET_INGRESS static DEFINE_STATIC_KEY_FALSE(ingress_needed_key); @@ -7574,14 +7596,16 @@ int dev_set_mtu_ext(struct net_device *dev, int new_mtu, err = __dev_set_mtu(dev, new_mtu); if (!err) { - err = call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); + err = call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev, + orig_mtu); err = notifier_to_errno(err); if (err) { /* setting mtu back and notifying everyone again, * so that they have a chance to revert changes. */ __dev_set_mtu(dev, orig_mtu); - call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); + call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev, + new_mtu); } } return err; diff --git a/net/core/devlink.c b/net/core/devlink.c @@ -2995,6 +2995,8 @@ devlink_param_value_get_from_info(const struct devlink_param *param, struct genl_info *info, union devlink_param_value *value) { + int len; + if (param->type != DEVLINK_PARAM_TYPE_BOOL && !info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]) return -EINVAL; @@ -3010,10 +3012,13 @@ devlink_param_value_get_from_info(const struct devlink_param *param, value->vu32 = nla_get_u32(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]); break; case DEVLINK_PARAM_TYPE_STRING: - if (nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]) > - DEVLINK_PARAM_MAX_STRING_VALUE) + len = strnlen(nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]), + nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA])); + if (len == nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]) || + len >= __DEVLINK_PARAM_MAX_STRING_VALUE) return -EINVAL; - value->vstr = nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]); + strcpy(value->vstr, + nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA])); break; case DEVLINK_PARAM_TYPE_BOOL: value->vbool = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA] ? @@ -3100,7 +3105,10 @@ static int devlink_nl_cmd_param_set_doit(struct sk_buff *skb, return -EOPNOTSUPP; if (cmode == DEVLINK_PARAM_CMODE_DRIVERINIT) { - param_item->driverinit_value = value; + if (param->type == DEVLINK_PARAM_TYPE_STRING) + strcpy(param_item->driverinit_value.vstr, value.vstr); + else + param_item->driverinit_value = value; param_item->driverinit_value_valid = true; } else { if (!param->set) @@ -4540,7 +4548,10 @@ int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id, DEVLINK_PARAM_CMODE_DRIVERINIT)) return -EOPNOTSUPP; - *init_val = param_item->driverinit_value; + if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING) + strcpy(init_val->vstr, param_item->driverinit_value.vstr); + else + *init_val = param_item->driverinit_value; return 0; } @@ -4571,7 +4582,10 @@ int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id, DEVLINK_PARAM_CMODE_DRIVERINIT)) return -EOPNOTSUPP; - param_item->driverinit_value = init_val; + if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING) + strcpy(param_item->driverinit_value.vstr, init_val.vstr); + else + param_item->driverinit_value = init_val; param_item->driverinit_value_valid = true; devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_NEW); @@ -4604,6 +4618,23 @@ void devlink_param_value_changed(struct devlink *devlink, u32 param_id) EXPORT_SYMBOL_GPL(devlink_param_value_changed); /** + * devlink_param_value_str_fill - Safely fill-up the string preventing + * from overflow of the preallocated buffer + * + * @dst_val: destination devlink_param_value + * @src: source buffer + */ +void devlink_param_value_str_fill(union devlink_param_value *dst_val, + const char *src) +{ + size_t len; + + len = strlcpy(dst_val->vstr, src, __DEVLINK_PARAM_MAX_STRING_VALUE); + WARN_ON(len >= __DEVLINK_PARAM_MAX_STRING_VALUE); +} +EXPORT_SYMBOL_GPL(devlink_param_value_str_fill); + +/** * devlink_region_create - create a new address region * * @devlink: devlink diff --git a/net/core/skbuff.c b/net/core/skbuff.c @@ -4452,14 +4452,16 @@ EXPORT_SYMBOL_GPL(skb_complete_wifi_ack); */ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off) { - if (unlikely(start > skb_headlen(skb)) || - unlikely((int)start + off > skb_headlen(skb) - 2)) { - net_warn_ratelimited("bad partial csum: csum=%u/%u len=%u\n", - start, off, skb_headlen(skb)); + u32 csum_end = (u32)start + (u32)off + sizeof(__sum16); + u32 csum_start = skb_headroom(skb) + (u32)start; + + if (unlikely(csum_start > U16_MAX || csum_end > skb_headlen(skb))) { + net_warn_ratelimited("bad partial csum: csum=%u/%u headroom=%u headlen=%u\n", + start, off, skb_headroom(skb), skb_headlen(skb)); return false; } skb->ip_summed = CHECKSUM_PARTIAL; - skb->csum_start = skb_headroom(skb) + start; + skb->csum_start = csum_start; skb->csum_offset = off; skb_set_transport_header(skb, start); return true; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c @@ -1243,7 +1243,8 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct netdev_notifier_changeupper_info *info; + struct netdev_notifier_changeupper_info *upper_info = ptr; + struct netdev_notifier_info_ext *info_ext = ptr; struct in_device *in_dev; struct net *net = dev_net(dev); unsigned int flags; @@ -1278,16 +1279,19 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo fib_sync_up(dev, RTNH_F_LINKDOWN); else fib_sync_down_dev(dev, event, false); - /* fall through */ + rt_cache_flush(net); + break; case NETDEV_CHANGEMTU: + fib_sync_mtu(dev, info_ext->ext.mtu); rt_cache_flush(net); break; case NETDEV_CHANGEUPPER: - info = ptr; + upper_info = ptr; /* flush all routes if dev is linked to or unlinked from * an L3 master device (e.g., VRF) */ - if (info->upper_dev && netif_is_l3_master(info->upper_dev)) + if (upper_info->upper_dev && + netif_is_l3_master(upper_info->upper_dev)) fib_disable_ip(dev, NETDEV_DOWN, true); break; } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c @@ -1470,6 +1470,56 @@ static int call_fib_nh_notifiers(struct fib_nh *fib_nh, return NOTIFY_DONE; } +/* Update the PMTU of exceptions when: + * - the new MTU of the first hop becomes smaller than the PMTU + * - the old MTU was the same as the PMTU, and it limited discovery of + * larger MTUs on the path. With that limit raised, we can now + * discover larger MTUs + * A special case is locked exceptions, for which the PMTU is smaller + * than the minimal accepted PMTU: + * - if the new MTU is greater than the PMTU, don't make any change + * - otherwise, unlock and set PMTU + */ +static void nh_update_mtu(struct fib_nh *nh, u32 new, u32 orig) +{ + struct fnhe_hash_bucket *bucket; + int i; + + bucket = rcu_dereference_protected(nh->nh_exceptions, 1); + if (!bucket) + return; + + for (i = 0; i < FNHE_HASH_SIZE; i++) { + struct fib_nh_exception *fnhe; + + for (fnhe = rcu_dereference_protected(bucket[i].chain, 1); + fnhe; + fnhe = rcu_dereference_protected(fnhe->fnhe_next, 1)) { + if (fnhe->fnhe_mtu_locked) { + if (new <= fnhe->fnhe_pmtu) { + fnhe->fnhe_pmtu = new; + fnhe->fnhe_mtu_locked = false; + } + } else if (new < fnhe->fnhe_pmtu || + orig == fnhe->fnhe_pmtu) { + fnhe->fnhe_pmtu = new; + } + } + } +} + +void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) +{ + unsigned int hash = fib_devindex_hashfn(dev->ifindex); + struct hlist_head *head = &fib_info_devhash[hash]; + struct fib_nh *nh; + + hlist_for_each_entry(nh, head, nh_hash) { + if (nh->nh_dev == dev) + nh_update_mtu(nh, dev->mtu, orig_mtu); + } +} + /* Event force Flags Description * NETDEV_CHANGE 0 LINKDOWN Carrier OFF, not for scope host * NETDEV_DOWN 0 LINKDOWN|DEAD Link down, not for scope host diff --git a/net/ipv4/route.c b/net/ipv4/route.c @@ -1001,21 +1001,22 @@ out: kfree_skb(skb); static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) { struct dst_entry *dst = &rt->dst; + u32 old_mtu = ipv4_mtu(dst); struct fib_result res; bool lock = false; if (ip_mtu_locked(dst)) return; - if (ipv4_mtu(dst) < mtu) + if (old_mtu < mtu) return; if (mtu < ip_rt_min_pmtu) { lock = true; - mtu = ip_rt_min_pmtu; + mtu = min(old_mtu, ip_rt_min_pmtu); } - if (rt->rt_pmtu == mtu && + if (rt->rt_pmtu == mtu && !lock && time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2)) return; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c @@ -1627,7 +1627,7 @@ busy_check: *err = error; return NULL; } -EXPORT_SYMBOL_GPL(__skb_recv_udp); +EXPORT_SYMBOL(__skb_recv_udp); /* * This should be easy, if there is something there we diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c @@ -196,6 +196,8 @@ void fib6_info_destroy_rcu(struct rcu_head *head) *ppcpu_rt = NULL; } } + + free_percpu(f6i->rt6i_pcpu); } lwtstate_put(f6i->fib6_nh.nh_lwtstate); diff --git a/net/rds/send.c b/net/rds/send.c @@ -1007,7 +1007,8 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm, return ret; } -static int rds_send_mprds_hash(struct rds_sock *rs, struct rds_connection *conn) +static int rds_send_mprds_hash(struct rds_sock *rs, + struct rds_connection *conn, int nonblock) { int hash; @@ -1023,10 +1024,16 @@ static int rds_send_mprds_hash(struct rds_sock *rs, struct rds_connection *conn) * used. But if we are interrupted, we have to use the zero * c_path in case the connection ends up being non-MP capable. */ - if (conn->c_npaths == 0) + if (conn->c_npaths == 0) { + /* Cannot wait for the connection be made, so just use + * the base c_path. + */ + if (nonblock) + return 0; if (wait_event_interruptible(conn->c_hs_waitq, conn->c_npaths != 0)) hash = 0; + } if (conn->c_npaths == 1) hash = 0; } @@ -1256,7 +1263,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) } if (conn->c_trans->t_mp_capable) - cpath = &conn->c_path[rds_send_mprds_hash(rs, conn)]; + cpath = &conn->c_path[rds_send_mprds_hash(rs, conn, nonblock)]; else cpath = &conn->c_path[0]; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h @@ -302,6 +302,7 @@ struct rxrpc_peer { /* calculated RTT cache */ #define RXRPC_RTT_CACHE_SIZE 32 + spinlock_t rtt_input_lock; /* RTT lock for input routine */ ktime_t rtt_last_req; /* Time of last RTT request */ u64 rtt; /* Current RTT estimate (in nS) */ u64 rtt_sum; /* Sum of cache contents */ @@ -442,17 +443,17 @@ struct rxrpc_connection { spinlock_t state_lock; /* state-change lock */ enum rxrpc_conn_cache_state cache_state; enum rxrpc_conn_proto_state state; /* current state of connection */ - u32 local_abort; /* local abort code */ - u32 remote_abort; /* remote abort code */ + u32 abort_code; /* Abort code of connection abort */ int debug_id; /* debug ID for printks */ atomic_t serial; /* packet serial number counter */ unsigned int hi_serial; /* highest serial number received */ u32 security_nonce; /* response re-use preventer */ - u16 service_id; /* Service ID, possibly upgraded */ + u32 service_id; /* Service ID, possibly upgraded */ u8 size_align; /* data size alignment (for security) */ u8 security_size; /* security header size */ u8 security_ix; /* security type */ u8 out_clientflag; /* RXRPC_CLIENT_INITIATED if we are client */ + short error; /* Local error code */ }; static inline bool rxrpc_to_server(const struct rxrpc_skb_priv *sp) @@ -635,6 +636,8 @@ struct rxrpc_call { bool tx_phase; /* T if transmission phase, F if receive phase */ u8 nr_jumbo_bad; /* Number of jumbo dups/exceeds-windows */ + spinlock_t input_lock; /* Lock for packet input to this call */ + /* receive-phase ACK management */ u8 ackr_reason; /* reason to ACK */ u16 ackr_skew; /* skew on packet being ACK'd */ @@ -720,8 +723,6 @@ int rxrpc_service_prealloc(struct rxrpc_sock *, gfp_t); void rxrpc_discard_prealloc(struct rxrpc_sock *); struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *, struct rxrpc_sock *, - struct rxrpc_peer *, - struct rxrpc_connection *, struct sk_buff *); void rxrpc_accept_incoming_calls(struct rxrpc_local *); struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *, unsigned long, @@ -891,8 +892,9 @@ extern unsigned long rxrpc_conn_idle_client_fast_expiry; extern struct idr rxrpc_client_conn_ids; void rxrpc_destroy_client_conn_ids(void); -int rxrpc_connect_call(struct rxrpc_call *, struct rxrpc_conn_parameters *, - struct sockaddr_rxrpc *, gfp_t); +int rxrpc_connect_call(struct rxrpc_sock *, struct rxrpc_call *, + struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *, + gfp_t); void rxrpc_expose_client_call(struct rxrpc_call *); void rxrpc_disconnect_client_call(struct rxrpc_call *); void rxrpc_put_client_conn(struct rxrpc_connection *); @@ -965,7 +967,7 @@ void rxrpc_unpublish_service_conn(struct rxrpc_connection *); /* * input.c */ -void rxrpc_data_ready(struct sock *); +int rxrpc_input_packet(struct sock *, struct sk_buff *); /* * insecure.c @@ -1045,10 +1047,11 @@ void rxrpc_peer_keepalive_worker(struct work_struct *); */ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *, const struct sockaddr_rxrpc *); -struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *, +struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *, struct rxrpc_local *, struct sockaddr_rxrpc *, gfp_t); struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t); -void rxrpc_new_incoming_peer(struct rxrpc_local *, struct rxrpc_peer *); +void rxrpc_new_incoming_peer(struct rxrpc_sock *, struct rxrpc_local *, + struct rxrpc_peer *); void rxrpc_destroy_all_peers(struct rxrpc_net *); struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *); struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *); diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c @@ -287,7 +287,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, (peer_tail + 1) & (RXRPC_BACKLOG_MAX - 1)); - rxrpc_new_incoming_peer(local, peer); + rxrpc_new_incoming_peer(rx, local, peer); } /* Now allocate and set up the connection */ @@ -333,11 +333,11 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, */ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, struct rxrpc_sock *rx, - struct rxrpc_peer *peer, - struct rxrpc_connection *conn, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxrpc_connection *conn; + struct rxrpc_peer *peer; struct rxrpc_call *call; _enter(""); @@ -354,6 +354,13 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, goto out; } + /* The peer, connection and call may all have sprung into existence due + * to a duplicate packet being handled on another CPU in parallel, so + * we have to recheck the routing. However, we're now holding + * rx->incoming_lock, so the values should remain stable. + */ + conn = rxrpc_find_connection_rcu(local, skb, &peer); + call = rxrpc_alloc_incoming_call(rx, local, peer, conn, skb); if (!call) { skb->mark = RXRPC_SKB_MARK_REJECT_BUSY; @@ -396,20 +403,22 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, case RXRPC_CONN_SERVICE: write_lock(&call->state_lock); - if (rx->discard_new_call) - call->state = RXRPC_CALL_SERVER_RECV_REQUEST; - else - call->state = RXRPC_CALL_SERVER_ACCEPTING; + if (call->state < RXRPC_CALL_COMPLETE) { + if (rx->discard_new_call) + call->state = RXRPC_CALL_SERVER_RECV_REQUEST; + else + call->state = RXRPC_CALL_SERVER_ACCEPTING; + } write_unlock(&call->state_lock); break; case RXRPC_CONN_REMOTELY_ABORTED: rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, - conn->remote_abort, -ECONNABORTED); + conn->abort_code, conn->error); break; case RXRPC_CONN_LOCALLY_ABORTED: rxrpc_abort_call("CON", call, sp->hdr.seq, - conn->local_abort, -ECONNABORTED); + conn->abort_code, conn->error); break; default: BUG(); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c @@ -138,6 +138,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, init_waitqueue_head(&call->waitq); spin_lock_init(&call->lock); spin_lock_init(&call->notify_lock); + spin_lock_init(&call->input_lock); rwlock_init(&call->state_lock); atomic_set(&call->usage, 1); call->debug_id = debug_id; @@ -287,7 +288,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, /* Set up or get a connection record and set the protocol parameters, * including channel number and call ID. */ - ret = rxrpc_connect_call(call, cp, srx, gfp); + ret = rxrpc_connect_call(rx, call, cp, srx, gfp); if (ret < 0) goto error; @@ -339,7 +340,7 @@ int rxrpc_retry_client_call(struct rxrpc_sock *rx, /* Set up or get a connection record and set the protocol parameters, * including channel number and call ID. */ - ret = rxrpc_connect_call(call, cp, srx, gfp); + ret = rxrpc_connect_call(rx, call, cp, srx, gfp); if (ret < 0) goto error; diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c @@ -276,7 +276,8 @@ dont_reuse: * If we return with a connection, the call will be on its waiting list. It's * left to the caller to assign a channel and wake up the call. */ -static int rxrpc_get_client_conn(struct rxrpc_call *call, +static int rxrpc_get_client_conn(struct rxrpc_sock *rx, + struct rxrpc_call *call, struct rxrpc_conn_parameters *cp, struct sockaddr_rxrpc *srx, gfp_t gfp) @@ -289,7 +290,7 @@ static int rxrpc_get_client_conn(struct rxrpc_call *call, _enter("{%d,%lx},", call->debug_id, call->user_call_ID); - cp->peer = rxrpc_lookup_peer(cp->local, srx, gfp); + cp->peer = rxrpc_lookup_peer(rx, cp->local, srx, gfp); if (!cp->peer) goto error; @@ -683,7 +684,8 @@ out: * find a connection for a call * - called in process context with IRQs enabled */ -int rxrpc_connect_call(struct rxrpc_call *call, +int rxrpc_connect_call(struct rxrpc_sock *rx, + struct rxrpc_call *call, struct rxrpc_conn_parameters *cp, struct sockaddr_rxrpc *srx, gfp_t gfp) @@ -696,7 +698,7 @@ int rxrpc_connect_call(struct rxrpc_call *call, rxrpc_discard_expired_client_conns(&rxnet->client_conn_reaper); rxrpc_cull_active_client_conns(rxnet); - ret = rxrpc_get_client_conn(call, cp, srx, gfp); + ret = rxrpc_get_client_conn(rx, call, cp, srx, gfp); if (ret < 0) goto out; diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c @@ -126,7 +126,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, switch (chan->last_type) { case RXRPC_PACKET_TYPE_ABORT: - _proto("Tx ABORT %%%u { %d } [re]", serial, conn->local_abort); + _proto("Tx ABORT %%%u { %d } [re]", serial, conn->abort_code); break; case RXRPC_PACKET_TYPE_ACK: trace_rxrpc_tx_ack(chan->call_debug_id, serial, @@ -153,13 +153,12 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, * pass a connection-level abort onto all calls on that connection */ static void rxrpc_abort_calls(struct rxrpc_connection *conn, - enum rxrpc_call_completion compl, - u32 abort_code, int error) + enum rxrpc_call_completion compl) { struct rxrpc_call *call; int i; - _enter("{%d},%x", conn->debug_id, abort_code); + _enter("{%d},%x", conn->debug_id, conn->abort_code); spin_lock(&conn->channel_lock); @@ -172,9 +171,11 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, trace_rxrpc_abort(call->debug_id, "CON", call->cid, call->call_id, 0, - abort_code, error); + conn->abort_code, + conn->error); if (rxrpc_set_call_completion(call, compl, - abort_code, error)) + conn->abort_code, + conn->error)) rxrpc_notify_socket(call); } } @@ -207,10 +208,12 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, return 0; } + conn->error = error; + conn->abort_code = abort_code; conn->state = RXRPC_CONN_LOCALLY_ABORTED; spin_unlock_bh(&conn->state_lock); - rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, abort_code, error); + rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED); msg.msg_name = &conn->params.peer->srx.transport; msg.msg_namelen = conn->params.peer->srx.transport_len; @@ -229,7 +232,7 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, whdr._rsvd = 0; whdr.serviceId = htons(conn->service_id); - word = htonl(conn->local_abort); + word = htonl(conn->abort_code); iov[0].iov_base = &whdr; iov[0].iov_len = sizeof(whdr); @@ -240,7 +243,7 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, serial = atomic_inc_return(&conn->serial); whdr.serial = htonl(serial); - _proto("Tx CONN ABORT %%%u { %d }", serial, conn->local_abort); + _proto("Tx CONN ABORT %%%u { %d }", serial, conn->abort_code); ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); if (ret < 0) { @@ -315,9 +318,10 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, abort_code = ntohl(wtmp); _proto("Rx ABORT %%%u { ac=%d }", sp->hdr.serial, abort_code); + conn->error = -ECONNABORTED; + conn->abort_code = abort_code; conn->state = RXRPC_CONN_REMOTELY_ABORTED; - rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, - abort_code, -ECONNABORTED); + rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED); return -ECONNABORTED; case RXRPC_PACKET_TYPE_CHALLENGE: diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c @@ -216,10 +216,11 @@ static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb, /* * Apply a hard ACK by advancing the Tx window. */ -static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, +static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, struct rxrpc_ack_summary *summary) { struct sk_buff *skb, *list = NULL; + bool rot_last = false; int ix; u8 annotation; @@ -243,15 +244,17 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, skb->next = list; list = skb; - if (annotation & RXRPC_TX_ANNO_LAST) + if (annotation & RXRPC_TX_ANNO_LAST) { set_bit(RXRPC_CALL_TX_LAST, &call->flags); + rot_last = true; + } if ((annotation & RXRPC_TX_ANNO_MASK) != RXRPC_TX_ANNO_ACK) summary->nr_rot_new_acks++; } spin_unlock(&call->lock); - trace_rxrpc_transmit(call, (test_bit(RXRPC_CALL_TX_LAST, &call->flags) ? + trace_rxrpc_transmit(call, (rot_last ? rxrpc_transmit_rotate_last : rxrpc_transmit_rotate)); wake_up(&call->waitq); @@ -262,6 +265,8 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, skb->next = NULL; rxrpc_free_skb(skb, rxrpc_skb_tx_freed); } + + return rot_last; } /* @@ -273,23 +278,26 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, const char *abort_why) { + unsigned int state; ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags)); write_lock(&call->state_lock); - switch (call->state) { + state = call->state; + switch (state) { case RXRPC_CALL_CLIENT_SEND_REQUEST: case RXRPC_CALL_CLIENT_AWAIT_REPLY: if (reply_begun) - call->state = RXRPC_CALL_CLIENT_RECV_REPLY; + call->state = state = RXRPC_CALL_CLIENT_RECV_REPLY; else - call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY; + call->state = state = RXRPC_CALL_CLIENT_AWAIT_REPLY; break; case RXRPC_CALL_SERVER_AWAIT_ACK: __rxrpc_call_completed(call); rxrpc_notify_socket(call); + state = call->state; break; default: @@ -297,11 +305,10 @@ static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, } write_unlock(&call->state_lock); - if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) { + if (state == RXRPC_CALL_CLIENT_AWAIT_REPLY) trace_rxrpc_transmit(call, rxrpc_transmit_await_reply); - } else { + else trace_rxrpc_transmit(call, rxrpc_transmit_end); - } _leave(" = ok"); return true; @@ -332,11 +339,11 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call) trace_rxrpc_timer(call, rxrpc_timer_init_for_reply, now); } - if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) - rxrpc_rotate_tx_window(call, top, &summary); if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { - rxrpc_proto_abort("TXL", call, top); - return false; + if (!rxrpc_rotate_tx_window(call, top, &summary)) { + rxrpc_proto_abort("TXL", call, top); + return false; + } } if (!rxrpc_end_tx_phase(call, true, "ETD")) return false; @@ -452,13 +459,15 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb, } } + spin_lock(&call->input_lock); + /* Received data implicitly ACKs all of the request packets we sent * when we're acting as a client. */ if ((state == RXRPC_CALL_CLIENT_SEND_REQUEST || state == RXRPC_CALL_CLIENT_AWAIT_REPLY) && !rxrpc_receiving_reply(call)) - return; + goto unlock; call->ackr_prev_seq = seq; @@ -488,12 +497,16 @@ next_subpacket: if (flags & RXRPC_LAST_PACKET) { if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) && - seq != call->rx_top) - return rxrpc_proto_abort("LSN", call, seq); + seq != call->rx_top) { + rxrpc_proto_abort("LSN", call, seq); + goto unlock; + } } else { if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) && - after_eq(seq, call->rx_top)) - return rxrpc_proto_abort("LSA", call, seq); + after_eq(seq, call->rx_top)) { + rxrpc_proto_abort("LSA", call, seq); + goto unlock; + } } trace_rxrpc_rx_data(call->debug_id, seq, serial, flags, annotation); @@ -560,8 +573,10 @@ next_subpacket: skip: offset += len; if (flags & RXRPC_JUMBO_PACKET) { - if (skb_copy_bits(skb, offset, &flags, 1) < 0) - return rxrpc_proto_abort("XJF", call, seq); + if (skb_copy_bits(skb, offset, &flags, 1) < 0) { + rxrpc_proto_abort("XJF", call, seq); + goto unlock; + } offset += sizeof(struct rxrpc_jumbo_header); seq++; serial++; @@ -601,6 +616,9 @@ ack: trace_rxrpc_notify_socket(call->debug_id, serial); rxrpc_notify_socket(call); } + +unlock: + spin_unlock(&call->input_lock); _leave(" [queued]"); } @@ -687,15 +705,14 @@ static void rxrpc_input_ping_response(struct rxrpc_call *call, ping_time = call->ping_time; smp_rmb(); - ping_serial = call->ping_serial; + ping_serial = READ_ONCE(call->ping_serial); if (orig_serial == call->acks_lost_ping) rxrpc_input_check_for_lost_ack(call); - if (!test_bit(RXRPC_CALL_PINGING, &call->flags) || - before(orig_serial, ping_serial)) + if (before(orig_serial, ping_serial) || + !test_and_clear_bit(RXRPC_CALL_PINGING, &call->flags)) return; - clear_bit(RXRPC_CALL_PINGING, &call->flags); if (after(orig_serial, ping_serial)) return; @@ -861,15 +878,32 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, rxrpc_propose_ack_respond_to_ack); } + /* Discard any out-of-order or duplicate ACKs. */ + if (before_eq(sp->hdr.serial, call->acks_latest)) + return; + + buf.info.rxMTU = 0; ioffset = offset + nr_acks + 3; - if (skb->len >= ioffset + sizeof(buf.info)) { - if (skb_copy_bits(skb, ioffset, &buf.info, sizeof(buf.info)) < 0) - return rxrpc_proto_abort("XAI", call, 0); + if (skb->len >= ioffset + sizeof(buf.info) && + skb_copy_bits(skb, ioffset, &buf.info, sizeof(buf.info)) < 0) + return rxrpc_proto_abort("XAI", call, 0); + + spin_lock(&call->input_lock); + + /* Discard any out-of-order or duplicate ACKs. */ + if (before_eq(sp->hdr.serial, call->acks_latest)) + goto out; + call->acks_latest_ts = skb->tstamp; + call->acks_latest = sp->hdr.serial; + + /* Parse rwind and mtu sizes if provided. */ + if (buf.info.rxMTU) rxrpc_input_ackinfo(call, skb, &buf.info); - } - if (first_soft_ack == 0) - return rxrpc_proto_abort("AK0", call, 0); + if (first_soft_ack == 0) { + rxrpc_proto_abort("AK0", call, 0); + goto out; + } /* Ignore ACKs unless we are or have just been transmitting. */ switch (READ_ONCE(call->state)) { @@ -879,39 +913,35 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, case RXRPC_CALL_SERVER_AWAIT_ACK: break; default: - return; - } - - /* Discard any out-of-order or duplicate ACKs. */ - if (before_eq(sp->hdr.serial, call->acks_latest)) { - _debug("discard ACK %d <= %d", - sp->hdr.serial, call->acks_latest); - return; + goto out; } - call->acks_latest_ts = skb->tstamp; - call->acks_latest = sp->hdr.serial; if (before(hard_ack, call->tx_hard_ack) || - after(hard_ack, call->tx_top)) - return rxrpc_proto_abort("AKW", call, 0); - if (nr_acks > call->tx_top - hard_ack) - return rxrpc_proto_abort("AKN", call, 0); + after(hard_ack, call->tx_top)) { + rxrpc_proto_abort("AKW", call, 0); + goto out; + } + if (nr_acks > call->tx_top - hard_ack) { + rxrpc_proto_abort("AKN", call, 0); + goto out; + } - if (after(hard_ack, call->tx_hard_ack)) - rxrpc_rotate_tx_window(call, hard_ack, &summary); + if (after(hard_ack, call->tx_hard_ack)) { + if (rxrpc_rotate_tx_window(call, hard_ack, &summary)) { + rxrpc_end_tx_phase(call, false, "ETA"); + goto out; + } + } if (nr_acks > 0) { - if (skb_copy_bits(skb, offset, buf.acks, nr_acks) < 0) - return rxrpc_proto_abort("XSA", call, 0); + if (skb_copy_bits(skb, offset, buf.acks, nr_acks) < 0) { + rxrpc_proto_abort("XSA", call, 0); + goto out; + } rxrpc_input_soft_acks(call, buf.acks, first_soft_ack, nr_acks, &summary); } - if (test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { - rxrpc_end_tx_phase(call, false, "ETA"); - return; - } - if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] & RXRPC_TX_ANNO_LAST && summary.nr_acks == call->tx_top - hard_ack && @@ -920,7 +950,9 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, false, true, rxrpc_propose_ack_ping_for_lost_reply); - return rxrpc_congestion_management(call, skb, &summary, acked_serial); + rxrpc_congestion_management(call, skb, &summary, acked_serial); +out: + spin_unlock(&call->input_lock); } /* @@ -933,9 +965,12 @@ static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb) _proto("Rx ACKALL %%%u", sp->hdr.serial); - rxrpc_rotate_tx_window(call, call->tx_top, &summary); - if (test_bit(RXRPC_CALL_TX_LAST, &call->flags)) + spin_lock(&call->input_lock); + + if (rxrpc_rotate_tx_window(call, call->tx_top, &summary)) rxrpc_end_tx_phase(call, false, "ETL"); + + spin_unlock(&call->input_lock); } /* @@ -1018,18 +1053,19 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call, } /* - * Handle a new call on a channel implicitly completing the preceding call on - * that channel. + * Handle a new service call on a channel implicitly completing the preceding + * call on that channel. This does not apply to client conns. * * TODO: If callNumber > call_id + 1, renegotiate security. */ -static void rxrpc_input_implicit_end_call(struct rxrpc_connection *conn, +static void rxrpc_input_implicit_end_call(struct rxrpc_sock *rx, + struct rxrpc_connection *conn, struct rxrpc_call *call) { switch (READ_ONCE(call->state)) { case RXRPC_CALL_SERVER_AWAIT_ACK: rxrpc_call_completed(call); - break; + /* Fall through */ case RXRPC_CALL_COMPLETE: break; default: @@ -1037,11 +1073,13 @@ static void rxrpc_input_implicit_end_call(struct rxrpc_connection *conn, set_bit(RXRPC_CALL_EV_ABORT, &call->events); rxrpc_queue_call(call); } + trace_rxrpc_improper_term(call); break; } - trace_rxrpc_improper_term(call); + spin_lock(&rx->incoming_lock); __rxrpc_disconnect_call(conn, call); + spin_unlock(&rx->incoming_lock); rxrpc_notify_socket(call); } @@ -1120,8 +1158,10 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) * The socket is locked by the caller and this prevents the socket from being * shut down and the local endpoint from going away, thus sk_user_data will not * be cleared until this function returns. + * + * Called with the RCU read lock held from the IP layer via UDP. */ -void rxrpc_data_ready(struct sock *udp_sk) +int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) { struct rxrpc_connection *conn; struct rxrpc_channel *chan; @@ -1130,38 +1170,17 @@ void rxrpc_data_ready(struct sock *udp_sk) struct rxrpc_local *local = udp_sk->sk_user_data; struct rxrpc_peer *peer = NULL; struct rxrpc_sock *rx = NULL; - struct sk_buff *skb; unsigned int channel; - int ret, skew = 0; + int skew = 0; _enter("%p", udp_sk); - ASSERT(!irqs_disabled()); - - skb = skb_recv_udp(udp_sk, 0, 1, &ret); - if (!skb) { - if (ret == -EAGAIN) - return; - _debug("UDP socket error %d", ret); - return; - } - if (skb->tstamp == 0) skb->tstamp = ktime_get_real(); rxrpc_new_skb(skb, rxrpc_skb_rx_received); - _net("recv skb %p", skb); - - /* we'll probably need to checksum it (didn't call sock_recvmsg) */ - if (skb_checksum_complete(skb)) { - rxrpc_free_skb(skb, rxrpc_skb_rx_freed); - __UDP_INC_STATS(&init_net, UDP_MIB_INERRORS, 0); - _leave(" [CSUM failed]"); - return; - } - - __UDP_INC_STATS(&init_net, UDP_MIB_INDATAGRAMS, 0); + skb_pull(skb, sizeof(struct udphdr)); /* The UDP protocol already released all skb resources; * we are free to add our own data there. @@ -1176,11 +1195,13 @@ void rxrpc_data_ready(struct sock *udp_sk) static int lose; if ((lose++ & 7) == 7) { trace_rxrpc_rx_lose(sp); - rxrpc_lose_skb(skb, rxrpc_skb_rx_lost); - return; + rxrpc_free_skb(skb, rxrpc_skb_rx_lost); + return 0; } } + if (skb->tstamp == 0) + skb->tstamp = ktime_get_real(); trace_rxrpc_rx_packet(sp); switch (sp->hdr.type) { @@ -1234,8 +1255,6 @@ void rxrpc_data_ready(struct sock *udp_sk) if (sp->hdr.serviceId == 0) goto bad_message; - rcu_read_lock(); - if (rxrpc_to_server(sp)) { /* Weed out packets to services we're not offering. Packets * that would begin a call are explicitly rejected and the rest @@ -1247,7 +1266,7 @@ void rxrpc_data_ready(struct sock *udp_sk) if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && sp->hdr.seq == 1) goto unsupported_service; - goto discard_unlock; + goto discard; } } @@ -1257,17 +1276,23 @@ void rxrpc_data_ready(struct sock *udp_sk) goto wrong_security; if (sp->hdr.serviceId != conn->service_id) { - if (!test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) || - conn->service_id != conn->params.service_id) + int old_id; + + if (!test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags)) + goto reupgrade; + old_id = cmpxchg(&conn->service_id, conn->params.service_id, + sp->hdr.serviceId); + + if (old_id != conn->params.service_id && + old_id != sp->hdr.serviceId) goto reupgrade; - conn->service_id = sp->hdr.serviceId; } if (sp->hdr.callNumber == 0) { /* Connection-level packet */ _debug("CONN %p {%d}", conn, conn->debug_id); rxrpc_post_packet_to_conn(conn, skb); - goto out_unlock; + goto out; } /* Note the serial number skew here */ @@ -1286,19 +1311,19 @@ void rxrpc_data_ready(struct sock *udp_sk) /* Ignore really old calls */ if (sp->hdr.callNumber < chan->last_call) - goto discard_unlock; + goto discard; if (sp->hdr.callNumber == chan->last_call) { if (chan->call || sp->hdr.type == RXRPC_PACKET_TYPE_ABORT) - goto discard_unlock; + goto discard; /* For the previous service call, if completed * successfully, we discard all further packets. */ if (rxrpc_conn_is_service(conn) && chan->last_type == RXRPC_PACKET_TYPE_ACK) - goto discard_unlock; + goto discard; /* But otherwise we need to retransmit the final packet * from data cached in the connection record. @@ -1309,18 +1334,16 @@ void rxrpc_data_ready(struct sock *udp_sk) sp->hdr.serial, sp->hdr.flags, 0); rxrpc_post_packet_to_conn(conn, skb); - goto out_unlock; + goto out; } call = rcu_dereference(chan->call); if (sp->hdr.callNumber > chan->call_id) { - if (rxrpc_to_client(sp)) { - rcu_read_unlock(); + if (rxrpc_to_client(sp)) goto reject_packet; - } if (call) - rxrpc_input_implicit_end_call(conn, call); + rxrpc_input_implicit_end_call(rx, conn, call); call = NULL; } @@ -1337,55 +1360,42 @@ void rxrpc_data_ready(struct sock *udp_sk) if (!call || atomic_read(&call->usage) == 0) { if (rxrpc_to_client(sp) || sp->hdr.type != RXRPC_PACKET_TYPE_DATA) - goto bad_message_unlock; + goto bad_message; if (sp->hdr.seq != 1) - goto discard_unlock; - call = rxrpc_new_incoming_call(local, rx, peer, conn, skb); - if (!call) { - rcu_read_unlock(); + goto discard; + call = rxrpc_new_incoming_call(local, rx, skb); + if (!call) goto reject_packet; - } rxrpc_send_ping(call, skb, skew); mutex_unlock(&call->user_mutex); } rxrpc_input_call_packet(call, skb, skew); - goto discard_unlock; + goto discard; -discard_unlock: - rcu_read_unlock(); discard: rxrpc_free_skb(skb, rxrpc_skb_rx_freed); out: trace_rxrpc_rx_done(0, 0); - return; - -out_unlock: - rcu_read_unlock(); - goto out; + return 0; wrong_security: - rcu_read_unlock(); trace_rxrpc_abort(0, "SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, RXKADINCONSISTENCY, EBADMSG); skb->priority = RXKADINCONSISTENCY; goto post_abort; unsupported_service: - rcu_read_unlock(); trace_rxrpc_abort(0, "INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, RX_INVALID_OPERATION, EOPNOTSUPP); skb->priority = RX_INVALID_OPERATION; goto post_abort; reupgrade: - rcu_read_unlock(); trace_rxrpc_abort(0, "UPG", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, RX_PROTOCOL_ERROR, EBADMSG); goto protocol_error; -bad_message_unlock: - rcu_read_unlock(); bad_message: trace_rxrpc_abort(0, "BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, RX_PROTOCOL_ERROR, EBADMSG); @@ -1397,4 +1407,5 @@ reject_packet: trace_rxrpc_rx_done(skb->mark, skb->priority); rxrpc_reject_packet(local, skb); _leave(" [badmsg]"); + return 0; } diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c @@ -19,6 +19,7 @@ #include <linux/ip.h> #include <linux/hashtable.h> #include <net/sock.h> +#include <net/udp.h> #include <net/af_rxrpc.h> #include "ar-internal.h" @@ -108,7 +109,7 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet, */ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) { - struct sock *sock; + struct sock *usk; int ret, opt; _enter("%p{%d,%d}", @@ -122,6 +123,28 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) return ret; } + /* set the socket up */ + usk = local->socket->sk; + inet_sk(usk)->mc_loop = 0; + + /* Enable CHECKSUM_UNNECESSARY to CHECKSUM_COMPLETE conversion */ + inet_inc_convert_csum(usk); + + rcu_assign_sk_user_data(usk, local); + + udp_sk(usk)->encap_type = UDP_ENCAP_RXRPC; + udp_sk(usk)->encap_rcv = rxrpc_input_packet; + udp_sk(usk)->encap_destroy = NULL; + udp_sk(usk)->gro_receive = NULL; + udp_sk(usk)->gro_complete = NULL; + + udp_encap_enable(); +#if IS_ENABLED(CONFIG_IPV6) + if (local->srx.transport.family == AF_INET6) + udpv6_encap_enable(); +#endif + usk->sk_error_report = rxrpc_error_report; + /* if a local address was supplied then bind it */ if (local->srx.transport_len > sizeof(sa_family_t)) { _debug("bind"); @@ -191,11 +214,6 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) BUG(); } - /* set the socket up */ - sock = local->socket->sk; - sock->sk_user_data = local; - sock->sk_data_ready = rxrpc_data_ready; - sock->sk_error_report = rxrpc_error_report; _leave(" = 0"); return 0; diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c @@ -301,6 +301,8 @@ void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, if (rtt < 0) return; + spin_lock(&peer->rtt_input_lock); + /* Replace the oldest datum in the RTT buffer */ sum -= peer->rtt_cache[cursor]; sum += rtt; @@ -312,6 +314,8 @@ void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, peer->rtt_usage = usage; } + spin_unlock(&peer->rtt_input_lock); + /* Now recalculate the average */ if (usage == RXRPC_RTT_CACHE_SIZE) { avg = sum / RXRPC_RTT_CACHE_SIZE; @@ -320,6 +324,7 @@ void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, do_div(avg, usage); } + /* Don't need to update this under lock */ peer->rtt = avg; trace_rxrpc_rtt_rx(call, why, send_serial, resp_serial, rtt, usage, avg); diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c @@ -153,8 +153,10 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local, * assess the MTU size for the network interface through which this peer is * reached */ -static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer) +static void rxrpc_assess_MTU_size(struct rxrpc_sock *rx, + struct rxrpc_peer *peer) { + struct net *net = sock_net(&rx->sk); struct dst_entry *dst; struct rtable *rt; struct flowi fl; @@ -169,7 +171,7 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer) switch (peer->srx.transport.family) { case AF_INET: rt = ip_route_output_ports( - &init_net, fl4, NULL, + net, fl4, NULL, peer->srx.transport.sin.sin_addr.s_addr, 0, htons(7000), htons(7001), IPPROTO_UDP, 0, 0); if (IS_ERR(rt)) { @@ -188,7 +190,7 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer) sizeof(struct in6_addr)); fl6->fl6_dport = htons(7001); fl6->fl6_sport = htons(7000); - dst = ip6_route_output(&init_net, NULL, fl6); + dst = ip6_route_output(net, NULL, fl6); if (dst->error) { _leave(" [route err %d]", dst->error); return; @@ -223,6 +225,7 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp) peer->service_conns = RB_ROOT; seqlock_init(&peer->service_conn_lock); spin_lock_init(&peer->lock); + spin_lock_init(&peer->rtt_input_lock); peer->debug_id = atomic_inc_return(&rxrpc_debug_id); if (RXRPC_TX_SMSS > 2190) @@ -240,10 +243,11 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp) /* * Initialise peer record. */ -static void rxrpc_init_peer(struct rxrpc_peer *peer, unsigned long hash_key) +static void rxrpc_init_peer(struct rxrpc_sock *rx, struct rxrpc_peer *peer, + unsigned long hash_key) { peer->hash_key = hash_key; - rxrpc_assess_MTU_size(peer); + rxrpc_assess_MTU_size(rx, peer); peer->mtu = peer->if_mtu; peer->rtt_last_req = ktime_get_real(); @@ -275,7 +279,8 @@ static void rxrpc_init_peer(struct rxrpc_peer *peer, unsigned long hash_key) /* * Set up a new peer. */ -static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local, +static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_sock *rx, + struct rxrpc_local *local, struct sockaddr_rxrpc *srx, unsigned long hash_key, gfp_t gfp) @@ -287,7 +292,7 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local, peer = rxrpc_alloc_peer(local, gfp); if (peer) { memcpy(&peer->srx, srx, sizeof(*srx)); - rxrpc_init_peer(peer, hash_key); + rxrpc_init_peer(rx, peer, hash_key); } _leave(" = %p", peer); @@ -299,14 +304,15 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local, * since we've already done a search in the list from the non-reentrant context * (the data_ready handler) that is the only place we can add new peers. */ -void rxrpc_new_incoming_peer(struct rxrpc_local *local, struct rxrpc_peer *peer) +void rxrpc_new_incoming_peer(struct rxrpc_sock *rx, struct rxrpc_local *local, + struct rxrpc_peer *peer) { struct rxrpc_net *rxnet = local->rxnet; unsigned long hash_key; hash_key = rxrpc_peer_hash_key(local, &peer->srx); peer->local = local; - rxrpc_init_peer(peer, hash_key); + rxrpc_init_peer(rx, peer, hash_key); spin_lock(&rxnet->peer_hash_lock); hash_add_rcu(rxnet->peer_hash, &peer->hash_link, hash_key); @@ -317,7 +323,8 @@ void rxrpc_new_incoming_peer(struct rxrpc_local *local, struct rxrpc_peer *peer) /* * obtain a remote transport endpoint for the specified address */ -struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, +struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx, + struct rxrpc_local *local, struct sockaddr_rxrpc *srx, gfp_t gfp) { struct rxrpc_peer *peer, *candidate; @@ -337,7 +344,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, /* The peer is not yet present in hash - create a candidate * for a new record and then redo the search. */ - candidate = rxrpc_create_peer(local, srx, hash_key, gfp); + candidate = rxrpc_create_peer(rx, local, srx, hash_key, gfp); if (!candidate) { _leave(" = NULL [nomem]"); return NULL; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c @@ -398,6 +398,7 @@ static int u32_init(struct tcf_proto *tp) rcu_assign_pointer(tp_c->hlist, root_ht); root_ht->tp_c = tp_c; + root_ht->refcnt++; rcu_assign_pointer(tp->root, root_ht); tp->data = tp_c; return 0; @@ -610,7 +611,7 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht, struct tc_u_hnode __rcu **hn; struct tc_u_hnode *phn; - WARN_ON(ht->refcnt); + WARN_ON(--ht->refcnt); u32_clear_hnode(tp, ht, extack); @@ -649,7 +650,7 @@ static void u32_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) WARN_ON(root_ht == NULL); - if (root_ht && --root_ht->refcnt == 0) + if (root_ht && --root_ht->refcnt == 1) u32_destroy_hnode(tp, root_ht, extack); if (--tp_c->refcnt == 0) { @@ -698,7 +699,6 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last, } if (ht->refcnt == 1) { - ht->refcnt--; u32_destroy_hnode(tp, ht, extack); } else { NL_SET_ERR_MSG_MOD(extack, "Can not delete in-use filter"); @@ -708,11 +708,11 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last, out: *last = true; if (root_ht) { - if (root_ht->refcnt > 1) { + if (root_ht->refcnt > 2) { *last = false; goto ret; } - if (root_ht->refcnt == 1) { + if (root_ht->refcnt == 2) { if (!ht_empty(root_ht)) { *last = false; goto ret; diff --git a/net/tipc/link.c b/net/tipc/link.c @@ -477,6 +477,8 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id, l->in_session = false; l->bearer_id = bearer_id; l->tolerance = tolerance; + if (bc_rcvlink) + bc_rcvlink->tolerance = tolerance; l->net_plane = net_plane; l->advertised_mtu = mtu; l->mtu = mtu; @@ -843,14 +845,21 @@ static void link_prepare_wakeup(struct tipc_link *l) void tipc_link_reset(struct tipc_link *l) { + struct sk_buff_head list; + + __skb_queue_head_init(&list); + l->in_session = false; l->session++; l->mtu = l->advertised_mtu; + spin_lock_bh(&l->wakeupq.lock); + skb_queue_splice_init(&l->wakeupq, &list); + spin_unlock_bh(&l->wakeupq.lock); + spin_lock_bh(&l->inputq->lock); - skb_queue_splice_init(&l->wakeupq, l->inputq); + skb_queue_splice_init(&list, l->inputq); spin_unlock_bh(&l->inputq->lock); - spin_unlock_bh(&l->wakeupq.lock); __skb_queue_purge(&l->transmq); __skb_queue_purge(&l->deferdq); @@ -1031,7 +1040,7 @@ static int tipc_link_retrans(struct tipc_link *l, struct tipc_link *r, /* Detect repeated retransmit failures on same packet */ if (r->last_retransm != buf_seqno(skb)) { r->last_retransm = buf_seqno(skb); - r->stale_limit = jiffies + msecs_to_jiffies(l->tolerance); + r->stale_limit = jiffies + msecs_to_jiffies(r->tolerance); } else if (++r->stale_cnt > 99 && time_after(jiffies, r->stale_limit)) { link_retransmit_failure(l, skb); if (link_is_bc_sndlink(l)) @@ -1576,9 +1585,10 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, strncpy(if_name, data, TIPC_MAX_IF_NAME); /* Update own tolerance if peer indicates a non-zero value */ - if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) + if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) { l->tolerance = peers_tol; - + l->bc_rcvlink->tolerance = peers_tol; + } /* Update own priority if peer's priority is higher */ if (in_range(peers_prio, l->priority + 1, TIPC_MAX_LINK_PRI)) l->priority = peers_prio; @@ -1604,9 +1614,10 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, l->rcv_nxt_state = msg_seqno(hdr) + 1; /* Update own tolerance if peer indicates a non-zero value */ - if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) + if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) { l->tolerance = peers_tol; - + l->bc_rcvlink->tolerance = peers_tol; + } /* Update own prio if peer indicates a different value */ if ((peers_prio != l->priority) && in_range(peers_prio, 1, TIPC_MAX_LINK_PRI)) { @@ -2223,6 +2234,8 @@ void tipc_link_set_tolerance(struct tipc_link *l, u32 tol, struct sk_buff_head *xmitq) { l->tolerance = tol; + if (l->bc_rcvlink) + l->bc_rcvlink->tolerance = tol; if (link_is_up(l)) tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, tol, 0, xmitq); } diff --git a/net/tipc/socket.c b/net/tipc/socket.c @@ -1196,6 +1196,7 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, * @skb: pointer to message buffer. */ static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb, + struct sk_buff_head *inputq, struct sk_buff_head *xmitq) { struct tipc_msg *hdr = buf_msg(skb); @@ -1213,7 +1214,16 @@ static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb, tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk), tsk_peer_port(tsk)); sk->sk_state_change(sk); - goto exit; + + /* State change is ignored if socket already awake, + * - convert msg to abort msg and add to inqueue + */ + msg_set_user(hdr, TIPC_CRITICAL_IMPORTANCE); + msg_set_type(hdr, TIPC_CONN_MSG); + msg_set_size(hdr, BASIC_H_SIZE); + msg_set_hdr_sz(hdr, BASIC_H_SIZE); + __skb_queue_tail(inputq, skb); + return; } tsk->probe_unacked = false; @@ -1936,7 +1946,7 @@ static void tipc_sk_proto_rcv(struct sock *sk, switch (msg_user(hdr)) { case CONN_MANAGER: - tipc_sk_conn_proto_rcv(tsk, skb, xmitq); + tipc_sk_conn_proto_rcv(tsk, skb, inputq, xmitq); return; case SOCK_WAKEUP: tipc_dest_del(&tsk->cong_links, msg_orignode(hdr), 0); diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # # This test is for checking rtnetlink callpaths, and get as much coverage as possible. # diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # # Run a series of udpgso benchmarks