whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit 7d0ae236ed13d7645fb73b85e7c95deee46c4656
parent bb617b9b4519b0cef939c9c8e9c41470749f0d51
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Mon, 21 Jan 2019 12:52:31 +1300

Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

Pull networking fixes from David Miller:

 1) Fix endless loop in nf_tables, from Phil Sutter.

 2) Fix cross namespace ip6_gre tunnel hash list corruption, from
    Olivier Matz.

 3) Don't be too strict in phy_start_aneg() otherwise we might not allow
    restarting auto negotiation. From Heiner Kallweit.

 4) Fix various KMSAN uninitialized value cases in tipc, from Ying Xue.

 5) Memory leak in act_tunnel_key, from Davide Caratti.

 6) Handle chip errata of mv88e6390 PHY, from Andrew Lunn.

 7) Remove linear SKB assumption in fou/fou6, from Eric Dumazet.

 8) Missing udplite rehash callbacks, from Alexey Kodanev.

 9) Log dirty pages properly in vhost, from Jason Wang.

10) Use consume_skb() in neigh_probe() as this is a normal free not a
    drop, from Yang Wei. Likewise in macvlan_process_broadcast().

11) Missing device_del() in mdiobus_register() error paths, from Thomas
    Petazzoni.

12) Fix checksum handling of short packets in mlx5, from Cong Wang.

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (96 commits)
  bpf: in __bpf_redirect_no_mac pull mac only if present
  virtio_net: bulk free tx skbs
  net: phy: phy driver features are mandatory
  isdn: avm: Fix string plus integer warning from Clang
  net/mlx5e: Fix cb_ident duplicate in indirect block register
  net/mlx5e: Fix wrong (zero) TX drop counter indication for representor
  net/mlx5e: Fix wrong error code return on FEC query failure
  net/mlx5e: Force CHECKSUM_UNNECESSARY for short ethernet frames
  tools: bpftool: Cleanup license mess
  bpf: fix inner map masking to prevent oob under speculation
  bpf: pull in pkt_sched.h header for tooling to fix bpftool build
  selftests: forwarding: Add a test case for externally learned FDB entries
  selftests: mlxsw: Test FDB offload indication
  mlxsw: spectrum_switchdev: Do not treat static FDB entries as sticky
  net: bridge: Mark FDB entries that were added by user as such
  mlxsw: spectrum_fid: Update dummy FID index
  mlxsw: pci: Return error on PCI reset timeout
  mlxsw: pci: Increase PCI SW reset timeout
  mlxsw: pci: Ring CQ's doorbell before RDQ's
  MAINTAINERS: update email addresses of liquidio driver maintainers
  ...

Diffstat:
MDocumentation/networking/index.rst | 26+++++++++++++-------------
MDocumentation/networking/rxrpc.txt | 45---------------------------------------------
MDocumentation/networking/snmp_counter.rst | 130++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
MDocumentation/networking/timestamping.txt | 4++--
MMAINTAINERS | 7+++----
Mdrivers/atm/he.c | 2+-
Mdrivers/isdn/hardware/avm/b1.c | 2+-
Mdrivers/net/Kconfig | 2+-
Mdrivers/net/dsa/realtek-smi.c | 18+++++++++++++-----
Mdrivers/net/ethernet/amd/xgbe/xgbe-common.h | 2--
Mdrivers/net/ethernet/amd/xgbe/xgbe-dev.c | 22++++++++++++++++------
Mdrivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 6++++++
Mdrivers/net/ethernet/intel/igb/igb.h | 2+-
Mdrivers/net/ethernet/intel/igb/igb_ethtool.c | 4++--
Mdrivers/net/ethernet/intel/igb/igb_main.c | 14+++++++-------
Mdrivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 5++++-
Mdrivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 30+++++++++++++++++-------------
Mdrivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 13+++++++++++++
Mdrivers/net/ethernet/mellanox/mlxsw/pci.c | 16+++++++++-------
Mdrivers/net/ethernet/mellanox/mlxsw/pci_hw.h | 3++-
Mdrivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c | 4++--
Mdrivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 12++++++------
Mdrivers/net/macvlan.c | 2+-
Mdrivers/net/phy/bcm87xx.c | 2++
Mdrivers/net/phy/cortina.c | 1+
Mdrivers/net/phy/marvell.c | 37+++++++++++++++++++++++++++++++++++--
Mdrivers/net/phy/mdio_bus.c | 1+
Mdrivers/net/phy/meson-gxl.c | 1+
Mdrivers/net/phy/micrel.c | 2++
Mdrivers/net/phy/phy.c | 19+++++++------------
Mdrivers/net/phy/phy_device.c | 17+++++++++++++++++
Mdrivers/net/phy/teranetics.c | 1+
Mdrivers/net/ppp/pppoe.c | 1+
Mdrivers/net/usb/aqc111.c | 15+++++++++++++++
Mdrivers/net/usb/cdc_ether.c | 8++++++++
Mdrivers/net/virtio_net.c | 12++++++------
Mdrivers/vhost/net.c | 3++-
Mdrivers/vhost/vhost.c | 97+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------
Mdrivers/vhost/vhost.h | 3++-
Minclude/linux/phy.h | 6++++--
Minclude/linux/skbuff.h | 1+
Minclude/net/af_rxrpc.h | 16----------------
Minclude/net/ip_fib.h | 2+-
Minclude/net/netfilter/nf_flow_table.h | 1-
Minclude/uapi/linux/in.h | 2+-
Mkernel/bpf/btf.c | 2+-
Mkernel/bpf/cgroup.c | 1+
Mkernel/bpf/map_in_map.c | 17+++++++++++++++--
Mkernel/bpf/stackmap.c | 9+++++++--
Mnet/bpfilter/bpfilter_umh_blob.S | 2+-
Mnet/bridge/br_fdb.c | 5+++++
Mnet/bridge/br_forward.c | 9++++-----
Mnet/bridge/br_netfilter_ipv6.c | 1+
Mnet/bridge/netfilter/ebtables.c | 6++++--
Mnet/bridge/netfilter/nft_reject_bridge.c | 1+
Mnet/core/filter.c | 32++++++++++++++++++++------------
Mnet/core/lwt_bpf.c | 1+
Mnet/core/neighbour.c | 4++--
Mnet/ipv4/fib_frontend.c | 4++--
Mnet/ipv4/fib_trie.c | 15++++++++++++---
Mnet/ipv4/fou.c | 9+++++++--
Mnet/ipv4/ip_gre.c | 23++++++++++++++---------
Mnet/ipv4/ip_input.c | 1+
Mnet/ipv4/tcp.c | 2+-
Mnet/ipv4/udp.c | 18+++++++++++++-----
Mnet/ipv4/udp_impl.h | 1+
Mnet/ipv4/udplite.c | 1+
Mnet/ipv6/fou6.c | 9+++++++--
Mnet/ipv6/ip6_gre.c | 14++++++++++----
Mnet/ipv6/route.c | 14++------------
Mnet/ipv6/udp.c | 18+++++++++++++-----
Mnet/ipv6/udp_impl.h | 1+
Mnet/ipv6/udplite.c | 1+
Mnet/netfilter/nf_flow_table_core.c | 5+++--
Mnet/netfilter/nf_tables_api.c | 14+++++++-------
Mnet/netfilter/nft_flow_offload.c | 13++++++++-----
Mnet/openvswitch/flow_netlink.c | 2+-
Mnet/packet/af_packet.c | 3++-
Mnet/rxrpc/af_rxrpc.c | 70----------------------------------------------------------------------
Mnet/rxrpc/ar-internal.h | 19++++++++++++-------
Mnet/rxrpc/call_object.c | 97-------------------------------------------------------------------------------
Mnet/rxrpc/conn_client.c | 5+----
Mnet/rxrpc/sendmsg.c | 24+++++++++++-------------
Mnet/sched/act_tunnel_key.c | 19+++++++++++--------
Mnet/sched/cls_api.c | 3+--
Mnet/sched/cls_flower.c | 19++++++++++++++-----
Mnet/sched/sch_cake.c | 5+++--
Mnet/sched/sch_cbs.c | 3++-
Mnet/sched/sch_drr.c | 7+++++--
Mnet/sched/sch_dsmark.c | 3++-
Mnet/sched/sch_hfsc.c | 9+++++----
Mnet/sched/sch_htb.c | 3++-
Mnet/sched/sch_prio.c | 3++-
Mnet/sched/sch_qfq.c | 20++++++++++++--------
Mnet/sched/sch_tbf.c | 3++-
Mnet/sctp/ipv6.c | 5+----
Mnet/sctp/protocol.c | 4+---
Mnet/tipc/netlink_compat.c | 50+++++++++++++++++++++++++++++++++++++++++++++++++-
Mnet/tipc/topsrv.c | 2+-
Mnet/xdp/xdp_umem.c | 16+++++++++++++---
Msamples/bpf/Makefile | 1+
Asamples/bpf/asm_goto_workaround.h | 16++++++++++++++++
Mtools/bpf/bpftool/Makefile | 9++++++++-
Mtools/bpf/bpftool/json_writer.c | 7+------
Mtools/bpf/bpftool/json_writer.h | 5-----
Atools/include/uapi/linux/pkt_sched.h | 1163+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtools/lib/bpf/bpf.c | 19+++++++++++++++----
Mtools/testing/selftests/bpf/Makefile | 1+
Mtools/testing/selftests/bpf/test_progs.c | 30++++++++++++++++++++++++++++++
Mtools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh | 20++++++++++++++++++++
Mtools/testing/selftests/net/forwarding/bridge_vlan_aware.sh | 34+++++++++++++++++++++++++++++++++-
Mtools/testing/selftests/networking/timestamping/txtimestamp.c | 2+-
Mtools/testing/selftests/tc-testing/tc-tests/actions/ife.json | 88++++++++++++++++++++++++++++++++++++++++----------------------------------------
Mtools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json | 31+------------------------------
114 files changed, 2090 insertions(+), 602 deletions(-)

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst @@ -11,19 +11,19 @@ Contents: batman-adv can can_ucan_protocol - dpaa2/index - e100 - e1000 - e1000e - fm10k - igb - igbvf - ixgb - ixgbe - ixgbevf - i40e - iavf - ice + device_drivers/freescale/dpaa2/index + device_drivers/intel/e100 + device_drivers/intel/e1000 + device_drivers/intel/e1000e + device_drivers/intel/fm10k + device_drivers/intel/igb + device_drivers/intel/igbvf + device_drivers/intel/ixgb + device_drivers/intel/ixgbe + device_drivers/intel/ixgbevf + device_drivers/intel/i40e + device_drivers/intel/iavf + device_drivers/intel/ice kapi z8530book msg_zerocopy diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt @@ -1000,51 +1000,6 @@ The kernel interface functions are as follows: size should be set when the call is begun. tx_total_len may not be less than zero. - (*) Check to see the completion state of a call so that the caller can assess - whether it needs to be retried. - - enum rxrpc_call_completion { - RXRPC_CALL_SUCCEEDED, - RXRPC_CALL_REMOTELY_ABORTED, - RXRPC_CALL_LOCALLY_ABORTED, - RXRPC_CALL_LOCAL_ERROR, - RXRPC_CALL_NETWORK_ERROR, - }; - - int rxrpc_kernel_check_call(struct socket *sock, struct rxrpc_call *call, - enum rxrpc_call_completion *_compl, - u32 *_abort_code); - - On return, -EINPROGRESS will be returned if the call is still ongoing; if - it is finished, *_compl will be set to indicate the manner of completion, - *_abort_code will be set to any abort code that occurred. 0 will be - returned on a successful completion, -ECONNABORTED will be returned if the - client failed due to a remote abort and anything else will return an - appropriate error code. - - The caller should look at this information to decide if it's worth - retrying the call. - - (*) Retry a client call. - - int rxrpc_kernel_retry_call(struct socket *sock, - struct rxrpc_call *call, - struct sockaddr_rxrpc *srx, - struct key *key); - - This attempts to partially reinitialise a call and submit it again while - reusing the original call's Tx queue to avoid the need to repackage and - re-encrypt the data to be sent. call indicates the call to retry, srx the - new address to send it to and key the encryption key to use for signing or - encrypting the packets. - - For this to work, the first Tx data packet must still be in the transmit - queue, and currently this is only permitted for local and network errors - and the call must not have been aborted. Any partially constructed Tx - packet is left as is and can continue being filled afterwards. - - It returns 0 if the call was requeued and an error otherwise. - (*) Get call RTT. u64 rxrpc_kernel_get_rtt(struct socket *sock, struct rxrpc_call *call); diff --git a/Documentation/networking/snmp_counter.rst b/Documentation/networking/snmp_counter.rst @@ -336,7 +336,26 @@ time client replies ACK, this socket will get another chance to move to the accept queue. -TCP Fast Open +* TcpEstabResets +Defined in `RFC1213 tcpEstabResets`_. + +.. _RFC1213 tcpEstabResets: https://tools.ietf.org/html/rfc1213#page-48 + +* TcpAttemptFails +Defined in `RFC1213 tcpAttemptFails`_. + +.. _RFC1213 tcpAttemptFails: https://tools.ietf.org/html/rfc1213#page-48 + +* TcpOutRsts +Defined in `RFC1213 tcpOutRsts`_. The RFC says this counter indicates +the 'segments sent containing the RST flag', but in linux kernel, this +couner indicates the segments kerenl tried to send. The sending +process might be failed due to some errors (e.g. memory alloc failed). + +.. _RFC1213 tcpOutRsts: https://tools.ietf.org/html/rfc1213#page-52 + + +TCP Fast Path ============ When kernel receives a TCP packet, it has two paths to handler the packet, one is fast path, another is slow path. The comment in kernel @@ -383,8 +402,6 @@ increase 1. TCP abort ======== - - * TcpExtTCPAbortOnData It means TCP layer has data in flight, but need to close the connection. So TCP layer sends a RST to the other side, indicate the @@ -545,7 +562,6 @@ packet yet, the sender would know packet 4 is out of order. The TCP stack of kernel will increase TcpExtTCPSACKReorder for both of the above scenarios. - DSACK ===== The DSACK is defined in `RFC2883`_. The receiver uses DSACK to report @@ -566,13 +582,63 @@ The TCP stack receives an out of order duplicate packet, so it sends a DSACK to the sender. * TcpExtTCPDSACKRecv -The TCP stack receives a DSACK, which indicate an acknowledged +The TCP stack receives a DSACK, which indicates an acknowledged duplicate packet is received. * TcpExtTCPDSACKOfoRecv The TCP stack receives a DSACK, which indicate an out of order duplicate packet is received. +invalid SACK and DSACK +==================== +When a SACK (or DSACK) block is invalid, a corresponding counter would +be updated. The validation method is base on the start/end sequence +number of the SACK block. For more details, please refer the comment +of the function tcp_is_sackblock_valid in the kernel source code. A +SACK option could have up to 4 blocks, they are checked +individually. E.g., if 3 blocks of a SACk is invalid, the +corresponding counter would be updated 3 times. The comment of the +`Add counters for discarded SACK blocks`_ patch has additional +explaination: + +.. _Add counters for discarded SACK blocks: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=18f02545a9a16c9a89778b91a162ad16d510bb32 + +* TcpExtTCPSACKDiscard +This counter indicates how many SACK blocks are invalid. If the invalid +SACK block is caused by ACK recording, the TCP stack will only ignore +it and won't update this counter. + +* TcpExtTCPDSACKIgnoredOld and TcpExtTCPDSACKIgnoredNoUndo +When a DSACK block is invalid, one of these two counters would be +updated. Which counter will be updated depends on the undo_marker flag +of the TCP socket. If the undo_marker is not set, the TCP stack isn't +likely to re-transmit any packets, and we still receive an invalid +DSACK block, the reason might be that the packet is duplicated in the +middle of the network. In such scenario, TcpExtTCPDSACKIgnoredNoUndo +will be updated. If the undo_marker is set, TcpExtTCPDSACKIgnoredOld +will be updated. As implied in its name, it might be an old packet. + +SACK shift +========= +The linux networking stack stores data in sk_buff struct (skb for +short). If a SACK block acrosses multiple skb, the TCP stack will try +to re-arrange data in these skb. E.g. if a SACK block acknowledges seq +10 to 15, skb1 has seq 10 to 13, skb2 has seq 14 to 20. The seq 14 and +15 in skb2 would be moved to skb1. This operation is 'shift'. If a +SACK block acknowledges seq 10 to 20, skb1 has seq 10 to 13, skb2 has +seq 14 to 20. All data in skb2 will be moved to skb1, and skb2 will be +discard, this operation is 'merge'. + +* TcpExtTCPSackShifted +A skb is shifted + +* TcpExtTCPSackMerged +A skb is merged + +* TcpExtTCPSackShiftFallback +A skb should be shifted or merged, but the TCP stack doesn't do it for +some reasons. + TCP out of order =============== * TcpExtTCPOFOQueue @@ -662,6 +728,60 @@ unacknowledged number (more strict than `RFC 5961 section 5.2`_). .. _RFC 5961 section 4.2: https://tools.ietf.org/html/rfc5961#page-9 .. _RFC 5961 section 5.2: https://tools.ietf.org/html/rfc5961#page-11 +TCP receive window +================= +* TcpExtTCPWantZeroWindowAdv +Depending on current memory usage, the TCP stack tries to set receive +window to zero. But the receive window might still be a no-zero +value. For example, if the previous window size is 10, and the TCP +stack receives 3 bytes, the current window size would be 7 even if the +window size calculated by the memory usage is zero. + +* TcpExtTCPToZeroWindowAdv +The TCP receive window is set to zero from a no-zero value. + +* TcpExtTCPFromZeroWindowAdv +The TCP receive window is set to no-zero value from zero. + + +Delayed ACK +========== +The TCP Delayed ACK is a technique which is used for reducing the +packet count in the network. For more details, please refer the +`Delayed ACK wiki`_ + +.. _Delayed ACK wiki: https://en.wikipedia.org/wiki/TCP_delayed_acknowledgment + +* TcpExtDelayedACKs +A delayed ACK timer expires. The TCP stack will send a pure ACK packet +and exit the delayed ACK mode. + +* TcpExtDelayedACKLocked +A delayed ACK timer expires, but the TCP stack can't send an ACK +immediately due to the socket is locked by a userspace program. The +TCP stack will send a pure ACK later (after the userspace program +unlock the socket). When the TCP stack sends the pure ACK later, the +TCP stack will also update TcpExtDelayedACKs and exit the delayed ACK +mode. + +* TcpExtDelayedACKLost +It will be updated when the TCP stack receives a packet which has been +ACKed. A Delayed ACK loss might cause this issue, but it would also be +triggered by other reasons, such as a packet is duplicated in the +network. + +Tail Loss Probe (TLP) +=================== +TLP is an algorithm which is used to detect TCP packet loss. For more +details, please refer the `TLP paper`_. + +.. _TLP paper: https://tools.ietf.org/html/draft-dukkipati-tcpm-tcp-loss-probe-01 + +* TcpExtTCPLossProbes +A TLP probe packet is sent. + +* TcpExtTCPLossProbeRecovery +A packet loss is detected and recovered by TLP. examples ======= diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt @@ -417,7 +417,7 @@ is again deprecated and ts[2] holds a hardware timestamp if set. Hardware time stamping must also be initialized for each device driver that is expected to do hardware time stamping. The parameter is defined in -/include/linux/net_tstamp.h as: +include/uapi/linux/net_tstamp.h as: struct hwtstamp_config { int flags; /* no flags defined right now, must be zero */ @@ -487,7 +487,7 @@ enum { HWTSTAMP_FILTER_PTP_V1_L4_EVENT, /* for the complete list of values, please check - * the include file /include/linux/net_tstamp.h + * the include file include/uapi/linux/net_tstamp.h */ }; diff --git a/MAINTAINERS b/MAINTAINERS @@ -3471,10 +3471,9 @@ F: drivers/i2c/busses/i2c-octeon* F: drivers/i2c/busses/i2c-thunderx* CAVIUM LIQUIDIO NETWORK DRIVER -M: Derek Chickles <derek.chickles@caviumnetworks.com> -M: Satanand Burla <satananda.burla@caviumnetworks.com> -M: Felix Manlunas <felix.manlunas@caviumnetworks.com> -M: Raghu Vatsavayi <raghu.vatsavayi@caviumnetworks.com> +M: Derek Chickles <dchickles@marvell.com> +M: Satanand Burla <sburla@marvell.com> +M: Felix Manlunas <fmanlunas@marvell.com> L: netdev@vger.kernel.org W: http://www.cavium.com S: Supported diff --git a/drivers/atm/he.c b/drivers/atm/he.c @@ -718,7 +718,7 @@ static int he_init_cs_block_rcm(struct he_dev *he_dev) instead of '/ 512', use '>> 9' to prevent a call to divdu3 on x86 platforms */ - rate_cps = (unsigned long long) (1 << exp) * (man + 512) >> 9; + rate_cps = (unsigned long long) (1UL << exp) * (man + 512) >> 9; if (rate_cps < 10) rate_cps = 10; /* 2.2.1 minimum payload rate is 10 cps */ diff --git a/drivers/isdn/hardware/avm/b1.c b/drivers/isdn/hardware/avm/b1.c @@ -423,7 +423,7 @@ void b1_parse_version(avmctrl_info *cinfo) int i, j; for (j = 0; j < AVM_MAXVERSION; j++) - cinfo->version[j] = "\0\0" + 1; + cinfo->version[j] = ""; for (i = 0, j = 0; j < AVM_MAXVERSION && i < cinfo->versionlen; j++, i += cinfo->versionbuf[i] + 1) diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig @@ -519,7 +519,7 @@ config NET_FAILOVER and destroy a failover master netdev and manages a primary and standby slave netdevs that get registered via the generic failover infrastructure. This can be used by paravirtual drivers to enable - an alternate low latency datapath. It alsoenables live migration of + an alternate low latency datapath. It also enables live migration of a VM with direct attached VF by failing over to the paravirtual datapath when the VF is unplugged. diff --git a/drivers/net/dsa/realtek-smi.c b/drivers/net/dsa/realtek-smi.c @@ -347,16 +347,17 @@ int realtek_smi_setup_mdio(struct realtek_smi *smi) struct device_node *mdio_np; int ret; - mdio_np = of_find_compatible_node(smi->dev->of_node, NULL, - "realtek,smi-mdio"); + mdio_np = of_get_compatible_child(smi->dev->of_node, "realtek,smi-mdio"); if (!mdio_np) { dev_err(smi->dev, "no MDIO bus node\n"); return -ENODEV; } smi->slave_mii_bus = devm_mdiobus_alloc(smi->dev); - if (!smi->slave_mii_bus) - return -ENOMEM; + if (!smi->slave_mii_bus) { + ret = -ENOMEM; + goto err_put_node; + } smi->slave_mii_bus->priv = smi; smi->slave_mii_bus->name = "SMI slave MII"; smi->slave_mii_bus->read = realtek_smi_mdio_read; @@ -371,10 +372,15 @@ int realtek_smi_setup_mdio(struct realtek_smi *smi) if (ret) { dev_err(smi->dev, "unable to register MDIO bus %s\n", smi->slave_mii_bus->id); - of_node_put(mdio_np); + goto err_put_node; } return 0; + +err_put_node: + of_node_put(mdio_np); + + return ret; } static int realtek_smi_probe(struct platform_device *pdev) @@ -457,6 +463,8 @@ static int realtek_smi_remove(struct platform_device *pdev) struct realtek_smi *smi = dev_get_drvdata(&pdev->dev); dsa_unregister_switch(smi->ds); + if (smi->slave_mii_bus) + of_node_put(smi->slave_mii_bus->dev.of_node); gpiod_set_value(smi->reset, 1); return 0; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h @@ -431,8 +431,6 @@ #define MAC_MDIOSCAR_PA_WIDTH 5 #define MAC_MDIOSCAR_RA_INDEX 0 #define MAC_MDIOSCAR_RA_WIDTH 16 -#define MAC_MDIOSCAR_REG_INDEX 0 -#define MAC_MDIOSCAR_REG_WIDTH 21 #define MAC_MDIOSCCDR_BUSY_INDEX 22 #define MAC_MDIOSCCDR_BUSY_WIDTH 1 #define MAC_MDIOSCCDR_CMD_INDEX 16 diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c @@ -1284,6 +1284,20 @@ static void xgbe_write_mmd_regs(struct xgbe_prv_data *pdata, int prtad, } } +static unsigned int xgbe_create_mdio_sca(int port, int reg) +{ + unsigned int mdio_sca, da; + + da = (reg & MII_ADDR_C45) ? reg >> 16 : 0; + + mdio_sca = 0; + XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, RA, reg); + XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, PA, port); + XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, DA, da); + + return mdio_sca; +} + static int xgbe_write_ext_mii_regs(struct xgbe_prv_data *pdata, int addr, int reg, u16 val) { @@ -1291,9 +1305,7 @@ static int xgbe_write_ext_mii_regs(struct xgbe_prv_data *pdata, int addr, reinit_completion(&pdata->mdio_complete); - mdio_sca = 0; - XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, REG, reg); - XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, DA, addr); + mdio_sca = xgbe_create_mdio_sca(addr, reg); XGMAC_IOWRITE(pdata, MAC_MDIOSCAR, mdio_sca); mdio_sccd = 0; @@ -1317,9 +1329,7 @@ static int xgbe_read_ext_mii_regs(struct xgbe_prv_data *pdata, int addr, reinit_completion(&pdata->mdio_complete); - mdio_sca = 0; - XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, REG, reg); - XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, DA, addr); + mdio_sca = xgbe_create_mdio_sca(addr, reg); XGMAC_IOWRITE(pdata, MAC_MDIOSCAR, mdio_sca); mdio_sccd = 0; diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2051,6 +2051,7 @@ dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev) bool nonlinear = skb_is_nonlinear(skb); struct rtnl_link_stats64 *percpu_stats; struct dpaa_percpu_priv *percpu_priv; + struct netdev_queue *txq; struct dpaa_priv *priv; struct qm_fd fd; int offset = 0; @@ -2100,6 +2101,11 @@ dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev) if (unlikely(err < 0)) goto skb_to_fd_failed; + txq = netdev_get_tx_queue(net_dev, queue_mapping); + + /* LLTX requires to do our own update of trans_start */ + txq->trans_start = jiffies; + if (priv->tx_tstamp && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) { fd.cmd |= cpu_to_be32(FM_FD_CMD_UPD); skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h @@ -515,7 +515,7 @@ struct igb_adapter { /* OS defined structs */ struct pci_dev *pdev; - struct mutex stats64_lock; + spinlock_t stats64_lock; struct rtnl_link_stats64 stats64; /* structs defined in e1000_hw.h */ diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2295,7 +2295,7 @@ static void igb_get_ethtool_stats(struct net_device *netdev, int i, j; char *p; - mutex_lock(&adapter->stats64_lock); + spin_lock(&adapter->stats64_lock); igb_update_stats(adapter); for (i = 0; i < IGB_GLOBAL_STATS_LEN; i++) { @@ -2338,7 +2338,7 @@ static void igb_get_ethtool_stats(struct net_device *netdev, } while (u64_stats_fetch_retry_irq(&ring->rx_syncp, start)); i += IGB_RX_QUEUE_STATS_LEN; } - mutex_unlock(&adapter->stats64_lock); + spin_unlock(&adapter->stats64_lock); } static void igb_get_strings(struct net_device *netdev, u32 stringset, u8 *data) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2203,9 +2203,9 @@ void igb_down(struct igb_adapter *adapter) del_timer_sync(&adapter->phy_info_timer); /* record the stats before reset*/ - mutex_lock(&adapter->stats64_lock); + spin_lock(&adapter->stats64_lock); igb_update_stats(adapter); - mutex_unlock(&adapter->stats64_lock); + spin_unlock(&adapter->stats64_lock); adapter->link_speed = 0; adapter->link_duplex = 0; @@ -3840,7 +3840,7 @@ static int igb_sw_init(struct igb_adapter *adapter) adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; spin_lock_init(&adapter->nfc_lock); - mutex_init(&adapter->stats64_lock); + spin_lock_init(&adapter->stats64_lock); #ifdef CONFIG_PCI_IOV switch (hw->mac.type) { case e1000_82576: @@ -5406,9 +5406,9 @@ no_wait: } } - mutex_lock(&adapter->stats64_lock); + spin_lock(&adapter->stats64_lock); igb_update_stats(adapter); - mutex_unlock(&adapter->stats64_lock); + spin_unlock(&adapter->stats64_lock); for (i = 0; i < adapter->num_tx_queues; i++) { struct igb_ring *tx_ring = adapter->tx_ring[i]; @@ -6235,10 +6235,10 @@ static void igb_get_stats64(struct net_device *netdev, { struct igb_adapter *adapter = netdev_priv(netdev); - mutex_lock(&adapter->stats64_lock); + spin_lock(&adapter->stats64_lock); igb_update_stats(adapter); memcpy(stats, &adapter->stats64, sizeof(*stats)); - mutex_unlock(&adapter->stats64_lock); + spin_unlock(&adapter->stats64_lock); } /** diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -844,9 +844,12 @@ int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Autoneg); - if (get_fec_supported_advertised(mdev, link_ksettings)) + err = get_fec_supported_advertised(mdev, link_ksettings); + if (err) { netdev_dbg(priv->netdev, "%s: FEC caps query failed: %d\n", __func__, err); + err = 0; /* don't fail caps query because of FEC error */ + } if (!an_disable_admin) ethtool_link_ksettings_add_link_mode(link_ksettings, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -58,7 +58,8 @@ struct mlx5e_rep_indr_block_priv { struct list_head list; }; -static void mlx5e_rep_indr_unregister_block(struct net_device *netdev); +static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv, + struct net_device *netdev); static void mlx5e_rep_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) @@ -179,6 +180,7 @@ static void mlx5e_rep_update_sw_counters(struct mlx5e_priv *priv) s->tx_packets += sq_stats->packets; s->tx_bytes += sq_stats->bytes; + s->tx_queue_dropped += sq_stats->dropped; } } } @@ -663,7 +665,7 @@ static void mlx5e_rep_indr_clean_block_privs(struct mlx5e_rep_priv *rpriv) struct list_head *head = &rpriv->uplink_priv.tc_indr_block_priv_list; list_for_each_entry_safe(cb_priv, temp, head, list) { - mlx5e_rep_indr_unregister_block(cb_priv->netdev); + mlx5e_rep_indr_unregister_block(rpriv, cb_priv->netdev); kfree(cb_priv); } } @@ -735,7 +737,7 @@ mlx5e_rep_indr_setup_tc_block(struct net_device *netdev, err = tcf_block_cb_register(f->block, mlx5e_rep_indr_setup_block_cb, - netdev, indr_priv, f->extack); + indr_priv, indr_priv, f->extack); if (err) { list_del(&indr_priv->list); kfree(indr_priv); @@ -743,14 +745,15 @@ mlx5e_rep_indr_setup_tc_block(struct net_device *netdev, return err; case TC_BLOCK_UNBIND: + indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev); + if (!indr_priv) + return -ENOENT; + tcf_block_cb_unregister(f->block, mlx5e_rep_indr_setup_block_cb, - netdev); - indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev); - if (indr_priv) { - list_del(&indr_priv->list); - kfree(indr_priv); - } + indr_priv); + list_del(&indr_priv->list); + kfree(indr_priv); return 0; default: @@ -779,7 +782,7 @@ static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv, err = __tc_indr_block_cb_register(netdev, rpriv, mlx5e_rep_indr_setup_tc_cb, - netdev); + rpriv); if (err) { struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); @@ -789,10 +792,11 @@ static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv, return err; } -static void mlx5e_rep_indr_unregister_block(struct net_device *netdev) +static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv, + struct net_device *netdev) { __tc_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_tc_cb, - netdev); + rpriv); } static int mlx5e_nic_rep_netdevice_event(struct notifier_block *nb, @@ -811,7 +815,7 @@ static int mlx5e_nic_rep_netdevice_event(struct notifier_block *nb, mlx5e_rep_indr_register_block(rpriv, netdev); break; case NETDEV_UNREGISTER: - mlx5e_rep_indr_unregister_block(netdev); + mlx5e_rep_indr_unregister_block(rpriv, netdev); break; } return NOTIFY_OK; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -732,6 +732,8 @@ static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto) ((struct ipv6hdr *)ip_p)->nexthdr; } +#define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN) + static inline void mlx5e_handle_csum(struct net_device *netdev, struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq, @@ -754,6 +756,17 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, if (unlikely(test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state))) goto csum_unnecessary; + /* CQE csum doesn't cover padding octets in short ethernet + * frames. And the pad field is appended prior to calculating + * and appending the FCS field. + * + * Detecting these padded frames requires to verify and parse + * IP headers, so we simply force all those small frames to be + * CHECKSUM_UNNECESSARY even if they are not padded. + */ + if (short_frame(skb->len)) + goto csum_unnecessary; + if (likely(is_last_ethertype_ip(skb, &network_depth, &proto))) { if (unlikely(get_ip_proto(skb, network_depth, proto) == IPPROTO_SCTP)) goto csum_unnecessary; diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -604,29 +604,31 @@ static void mlxsw_pci_cq_tasklet(unsigned long data) u16 wqe_counter = mlxsw_pci_cqe_wqe_counter_get(cqe); u8 sendq = mlxsw_pci_cqe_sr_get(q->u.cq.v, cqe); u8 dqn = mlxsw_pci_cqe_dqn_get(q->u.cq.v, cqe); + char ncqe[MLXSW_PCI_CQE_SIZE_MAX]; + + memcpy(ncqe, cqe, q->elem_size); + mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); if (sendq) { struct mlxsw_pci_queue *sdq; sdq = mlxsw_pci_sdq_get(mlxsw_pci, dqn); mlxsw_pci_cqe_sdq_handle(mlxsw_pci, sdq, - wqe_counter, cqe); + wqe_counter, ncqe); q->u.cq.comp_sdq_count++; } else { struct mlxsw_pci_queue *rdq; rdq = mlxsw_pci_rdq_get(mlxsw_pci, dqn); mlxsw_pci_cqe_rdq_handle(mlxsw_pci, rdq, - wqe_counter, q->u.cq.v, cqe); + wqe_counter, q->u.cq.v, ncqe); q->u.cq.comp_rdq_count++; } if (++items == credits) break; } - if (items) { - mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); + if (items) mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); - } } static u16 mlxsw_pci_cq_elem_count(const struct mlxsw_pci_queue *q) @@ -1365,10 +1367,10 @@ static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci, u32 val = mlxsw_pci_read32(mlxsw_pci, FW_READY); if ((val & MLXSW_PCI_FW_READY_MASK) == MLXSW_PCI_FW_READY_MAGIC) - break; + return 0; cond_resched(); } while (time_before(jiffies, end)); - return 0; + return -EBUSY; } static int mlxsw_pci_alloc_irq_vectors(struct mlxsw_pci *mlxsw_pci) diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h @@ -27,7 +27,7 @@ #define MLXSW_PCI_SW_RESET 0xF0010 #define MLXSW_PCI_SW_RESET_RST_BIT BIT(0) -#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 5000 +#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 13000 #define MLXSW_PCI_SW_RESET_WAIT_MSECS 100 #define MLXSW_PCI_FW_READY 0xA1844 #define MLXSW_PCI_FW_READY_MASK 0xFFFF @@ -53,6 +53,7 @@ #define MLXSW_PCI_WQE_SIZE 32 /* 32 bytes per element */ #define MLXSW_PCI_CQE01_SIZE 16 /* 16 bytes per element */ #define MLXSW_PCI_CQE2_SIZE 32 /* 32 bytes per element */ +#define MLXSW_PCI_CQE_SIZE_MAX MLXSW_PCI_CQE2_SIZE #define MLXSW_PCI_EQE_SIZE 16 /* 16 bytes per element */ #define MLXSW_PCI_WQE_COUNT (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_WQE_SIZE) #define MLXSW_PCI_CQE01_COUNT (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_CQE01_SIZE) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c @@ -997,8 +997,8 @@ static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_dummy_ops = { static const struct mlxsw_sp_fid_family mlxsw_sp_fid_dummy_family = { .type = MLXSW_SP_FID_TYPE_DUMMY, .fid_size = sizeof(struct mlxsw_sp_fid), - .start_index = MLXSW_SP_RFID_BASE - 1, - .end_index = MLXSW_SP_RFID_BASE - 1, + .start_index = VLAN_N_VID - 1, + .end_index = VLAN_N_VID - 1, .ops = &mlxsw_sp_fid_dummy_ops, }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1233,7 +1233,7 @@ mlxsw_sp_bridge_port_fdb_flush(struct mlxsw_sp *mlxsw_sp, static enum mlxsw_reg_sfd_rec_policy mlxsw_sp_sfd_rec_policy(bool dynamic) { return dynamic ? MLXSW_REG_SFD_REC_POLICY_DYNAMIC_ENTRY_INGRESS : - MLXSW_REG_SFD_REC_POLICY_STATIC_ENTRY; + MLXSW_REG_SFD_REC_POLICY_DYNAMIC_ENTRY_MLAG; } static enum mlxsw_reg_sfd_op mlxsw_sp_sfd_op(bool adding) @@ -1290,7 +1290,7 @@ out: static int __mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port, const char *mac, u16 fid, bool adding, enum mlxsw_reg_sfd_rec_action action, - bool dynamic) + enum mlxsw_reg_sfd_rec_policy policy) { char *sfd_pl; u8 num_rec; @@ -1301,8 +1301,7 @@ static int __mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port, return -ENOMEM; mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0); - mlxsw_reg_sfd_uc_pack(sfd_pl, 0, mlxsw_sp_sfd_rec_policy(dynamic), - mac, fid, action, local_port); + mlxsw_reg_sfd_uc_pack(sfd_pl, 0, policy, mac, fid, action, local_port); num_rec = mlxsw_reg_sfd_num_rec_get(sfd_pl); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl); if (err) @@ -1321,7 +1320,8 @@ static int mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port, bool dynamic) { return __mlxsw_sp_port_fdb_uc_op(mlxsw_sp, local_port, mac, fid, adding, - MLXSW_REG_SFD_REC_ACTION_NOP, dynamic); + MLXSW_REG_SFD_REC_ACTION_NOP, + mlxsw_sp_sfd_rec_policy(dynamic)); } int mlxsw_sp_rif_fdb_op(struct mlxsw_sp *mlxsw_sp, const char *mac, u16 fid, @@ -1329,7 +1329,7 @@ int mlxsw_sp_rif_fdb_op(struct mlxsw_sp *mlxsw_sp, const char *mac, u16 fid, { return __mlxsw_sp_port_fdb_uc_op(mlxsw_sp, 0, mac, fid, adding, MLXSW_REG_SFD_REC_ACTION_FORWARD_IP_ROUTER, - false); + MLXSW_REG_SFD_REC_POLICY_STATIC_ENTRY); } static int mlxsw_sp_port_fdb_uc_lag_op(struct mlxsw_sp *mlxsw_sp, u16 lag_id, diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c @@ -337,7 +337,7 @@ static void macvlan_process_broadcast(struct work_struct *w) if (src) dev_put(src->dev); - kfree_skb(skb); + consume_skb(skb); } } diff --git a/drivers/net/phy/bcm87xx.c b/drivers/net/phy/bcm87xx.c @@ -197,6 +197,7 @@ static struct phy_driver bcm87xx_driver[] = { .phy_id = PHY_ID_BCM8706, .phy_id_mask = 0xffffffff, .name = "Broadcom BCM8706", + .features = PHY_10GBIT_FEC_FEATURES, .config_init = bcm87xx_config_init, .config_aneg = bcm87xx_config_aneg, .read_status = bcm87xx_read_status, @@ -208,6 +209,7 @@ static struct phy_driver bcm87xx_driver[] = { .phy_id = PHY_ID_BCM8727, .phy_id_mask = 0xffffffff, .name = "Broadcom BCM8727", + .features = PHY_10GBIT_FEC_FEATURES, .config_init = bcm87xx_config_init, .config_aneg = bcm87xx_config_aneg, .read_status = bcm87xx_read_status, diff --git a/drivers/net/phy/cortina.c b/drivers/net/phy/cortina.c @@ -88,6 +88,7 @@ static struct phy_driver cortina_driver[] = { .phy_id = PHY_ID_CS4340, .phy_id_mask = 0xffffffff, .name = "Cortina CS4340", + .features = PHY_10GBIT_FEATURES, .config_init = gen10g_config_init, .config_aneg = gen10g_config_aneg, .read_status = cortina_read_status, diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c @@ -1046,6 +1046,39 @@ static int m88e1145_config_init(struct phy_device *phydev) return 0; } +/* The VOD can be out of specification on link up. Poke an + * undocumented register, in an undocumented page, with a magic value + * to fix this. + */ +static int m88e6390_errata(struct phy_device *phydev) +{ + int err; + + err = phy_write(phydev, MII_BMCR, + BMCR_ANENABLE | BMCR_SPEED1000 | BMCR_FULLDPLX); + if (err) + return err; + + usleep_range(300, 400); + + err = phy_write_paged(phydev, 0xf8, 0x08, 0x36); + if (err) + return err; + + return genphy_soft_reset(phydev); +} + +static int m88e6390_config_aneg(struct phy_device *phydev) +{ + int err; + + err = m88e6390_errata(phydev); + if (err) + return err; + + return m88e1510_config_aneg(phydev); +} + /** * fiber_lpa_mod_linkmode_lpa_t * @advertising: the linkmode advertisement settings @@ -1402,7 +1435,7 @@ static int m88e1318_set_wol(struct phy_device *phydev, * before enabling it if !phy_interrupt_is_valid() */ if (!phy_interrupt_is_valid(phydev)) - phy_read(phydev, MII_M1011_IEVENT); + __phy_read(phydev, MII_M1011_IEVENT); /* Enable the WOL interrupt */ err = __phy_modify(phydev, MII_88E1318S_PHY_CSIER, 0, @@ -2283,7 +2316,7 @@ static struct phy_driver marvell_drivers[] = { .features = PHY_GBIT_FEATURES, .probe = m88e6390_probe, .config_init = &marvell_config_init, - .config_aneg = &m88e1510_config_aneg, + .config_aneg = &m88e6390_config_aneg, .read_status = &marvell_read_status, .ack_interrupt = &marvell_ack_interrupt, .config_intr = &marvell_config_intr, diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c @@ -390,6 +390,7 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner) if (IS_ERR(gpiod)) { dev_err(&bus->dev, "mii_bus %s couldn't get reset GPIO\n", bus->id); + device_del(&bus->dev); return PTR_ERR(gpiod); } else if (gpiod) { bus->reset_gpiod = gpiod; diff --git a/drivers/net/phy/meson-gxl.c b/drivers/net/phy/meson-gxl.c @@ -233,6 +233,7 @@ static struct phy_driver meson_gxl_phy[] = { .name = "Meson GXL Internal PHY", .features = PHY_BASIC_FEATURES, .flags = PHY_IS_INTERNAL, + .soft_reset = genphy_soft_reset, .config_init = meson_gxl_config_init, .aneg_done = genphy_aneg_done, .read_status = meson_gxl_read_status, diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c @@ -1070,6 +1070,7 @@ static struct phy_driver ksphy_driver[] = { .driver_data = &ksz9021_type, .probe = kszphy_probe, .config_init = ksz9031_config_init, + .soft_reset = genphy_soft_reset, .read_status = ksz9031_read_status, .ack_interrupt = kszphy_ack_interrupt, .config_intr = kszphy_config_intr, @@ -1098,6 +1099,7 @@ static struct phy_driver ksphy_driver[] = { .phy_id = PHY_ID_KSZ8873MLL, .phy_id_mask = MICREL_PHY_ID_MASK, .name = "Micrel KSZ8873MLL Switch", + .features = PHY_BASIC_FEATURES, .config_init = kszphy_config_init, .config_aneg = ksz8873mll_config_aneg, .read_status = ksz8873mll_read_status, diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c @@ -543,13 +543,6 @@ int phy_start_aneg(struct phy_device *phydev) mutex_lock(&phydev->lock); - if (!__phy_is_started(phydev)) { - WARN(1, "called from state %s\n", - phy_state_to_str(phydev->state)); - err = -EBUSY; - goto out_unlock; - } - if (AUTONEG_DISABLE == phydev->autoneg) phy_sanitize_settings(phydev); @@ -560,11 +553,13 @@ int phy_start_aneg(struct phy_device *phydev) if (err < 0) goto out_unlock; - if (phydev->autoneg == AUTONEG_ENABLE) { - err = phy_check_link_status(phydev); - } else { - phydev->state = PHY_FORCING; - phydev->link_timeout = PHY_FORCE_TIMEOUT; + if (__phy_is_started(phydev)) { + if (phydev->autoneg == AUTONEG_ENABLE) { + err = phy_check_link_status(phydev); + } else { + phydev->state = PHY_FORCING; + phydev->link_timeout = PHY_FORCE_TIMEOUT; + } } out_unlock: diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c @@ -61,6 +61,9 @@ EXPORT_SYMBOL_GPL(phy_gbit_all_ports_features); __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_features) __ro_after_init; EXPORT_SYMBOL_GPL(phy_10gbit_features); +__ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_fec_features) __ro_after_init; +EXPORT_SYMBOL_GPL(phy_10gbit_fec_features); + static const int phy_basic_ports_array[] = { ETHTOOL_LINK_MODE_Autoneg_BIT, ETHTOOL_LINK_MODE_TP_BIT, @@ -109,6 +112,11 @@ const int phy_10gbit_features_array[1] = { }; EXPORT_SYMBOL_GPL(phy_10gbit_features_array); +const int phy_10gbit_fec_features_array[1] = { + ETHTOOL_LINK_MODE_10000baseR_FEC_BIT, +}; +EXPORT_SYMBOL_GPL(phy_10gbit_fec_features_array); + __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_full_features) __ro_after_init; EXPORT_SYMBOL_GPL(phy_10gbit_full_features); @@ -191,6 +199,10 @@ static void features_init(void) linkmode_set_bit_array(phy_10gbit_full_features_array, ARRAY_SIZE(phy_10gbit_full_features_array), phy_10gbit_full_features); + /* 10G FEC only */ + linkmode_set_bit_array(phy_10gbit_fec_features_array, + ARRAY_SIZE(phy_10gbit_fec_features_array), + phy_10gbit_fec_features); } void phy_device_free(struct phy_device *phydev) @@ -2243,6 +2255,11 @@ int phy_driver_register(struct phy_driver *new_driver, struct module *owner) { int retval; + if (WARN_ON(!new_driver->features)) { + pr_err("%s: Driver features are missing\n", new_driver->name); + return -EINVAL; + } + new_driver->mdiodrv.flags |= MDIO_DEVICE_IS_PHY; new_driver->mdiodrv.driver.name = new_driver->name; new_driver->mdiodrv.driver.bus = &mdio_bus_type; diff --git a/drivers/net/phy/teranetics.c b/drivers/net/phy/teranetics.c @@ -80,6 +80,7 @@ static struct phy_driver teranetics_driver[] = { .phy_id = PHY_ID_TN2020, .phy_id_mask = 0xffffffff, .name = "Teranetics TN2020", + .features = PHY_10GBIT_FEATURES, .soft_reset = gen10g_no_soft_reset, .aneg_done = teranetics_aneg_done, .config_init = gen10g_config_init, diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c @@ -445,6 +445,7 @@ static int pppoe_rcv(struct sk_buff *skb, struct net_device *dev, if (pskb_trim_rcsum(skb, len)) goto drop; + ph = pppoe_hdr(skb); pn = pppoe_pernet(dev_net(dev)); /* Note that get_item does a sock_hold(), so sk_pppox(po) diff --git a/drivers/net/usb/aqc111.c b/drivers/net/usb/aqc111.c @@ -1287,6 +1287,20 @@ static const struct driver_info asix112_info = { #undef ASIX112_DESC +static const struct driver_info trendnet_info = { + .description = "USB-C 3.1 to 5GBASE-T Ethernet Adapter", + .bind = aqc111_bind, + .unbind = aqc111_unbind, + .status = aqc111_status, + .link_reset = aqc111_link_reset, + .reset = aqc111_reset, + .stop = aqc111_stop, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | + FLAG_AVOID_UNLINK_URBS | FLAG_MULTI_PACKET, + .rx_fixup = aqc111_rx_fixup, + .tx_fixup = aqc111_tx_fixup, +}; + static int aqc111_suspend(struct usb_interface *intf, pm_message_t message) { struct usbnet *dev = usb_get_intfdata(intf); @@ -1440,6 +1454,7 @@ static const struct usb_device_id products[] = { {AQC111_USB_ETH_DEV(0x2eca, 0xc101, aqc111_info)}, {AQC111_USB_ETH_DEV(0x0b95, 0x2790, asix111_info)}, {AQC111_USB_ETH_DEV(0x0b95, 0x2791, asix112_info)}, + {AQC111_USB_ETH_DEV(0x20f4, 0xe05a, trendnet_info)}, { },/* END */ }; MODULE_DEVICE_TABLE(usb, products); diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c @@ -843,6 +843,14 @@ static const struct usb_device_id products[] = { .driver_info = 0, }, +/* USB-C 3.1 to 5GBASE-T Ethernet Adapter (based on AQC111U) */ +{ + USB_DEVICE_AND_INTERFACE_INFO(0x20f4, 0xe05a, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, + USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + /* WHITELIST!!! * * CDC Ether uses two interfaces, not necessarily consecutive. diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c @@ -1330,7 +1330,7 @@ static int virtnet_receive(struct receive_queue *rq, int budget, return stats.packets; } -static void free_old_xmit_skbs(struct send_queue *sq) +static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi) { struct sk_buff *skb; unsigned int len; @@ -1343,7 +1343,7 @@ static void free_old_xmit_skbs(struct send_queue *sq) bytes += skb->len; packets++; - dev_consume_skb_any(skb); + napi_consume_skb(skb, in_napi); } /* Avoid overhead when no packets have been processed @@ -1369,7 +1369,7 @@ static void virtnet_poll_cleantx(struct receive_queue *rq) return; if (__netif_tx_trylock(txq)) { - free_old_xmit_skbs(sq); + free_old_xmit_skbs(sq, true); __netif_tx_unlock(txq); } @@ -1445,7 +1445,7 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, vq2txq(sq->vq)); __netif_tx_lock(txq, raw_smp_processor_id()); - free_old_xmit_skbs(sq); + free_old_xmit_skbs(sq, true); __netif_tx_unlock(txq); virtqueue_napi_complete(napi, sq->vq, 0); @@ -1514,7 +1514,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) bool use_napi = sq->napi.weight; /* Free up any pending old buffers before queueing new ones. */ - free_old_xmit_skbs(sq); + free_old_xmit_skbs(sq, false); if (use_napi && kick) virtqueue_enable_cb_delayed(sq->vq); @@ -1557,7 +1557,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) if (!use_napi && unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { /* More just got used, free them then recheck. */ - free_old_xmit_skbs(sq); + free_old_xmit_skbs(sq, false); if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { netif_start_subqueue(dev, qnum); virtqueue_disable_cb(sq->vq); diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c @@ -1236,7 +1236,8 @@ static void handle_rx(struct vhost_net *net) if (nvq->done_idx > VHOST_NET_BATCH) vhost_net_signal_used(nvq); if (unlikely(vq_log)) - vhost_log_write(vq, vq_log, log, vhost_len); + vhost_log_write(vq, vq_log, log, vhost_len, + vq->iov, in); total_len += vhost_len; if (unlikely(vhost_exceeds_weight(++recv_pkts, total_len))) { vhost_poll_queue(&vq->poll); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c @@ -1737,13 +1737,87 @@ static int log_write(void __user *log_base, return r; } +static int log_write_hva(struct vhost_virtqueue *vq, u64 hva, u64 len) +{ + struct vhost_umem *umem = vq->umem; + struct vhost_umem_node *u; + u64 start, end, l, min; + int r; + bool hit = false; + + while (len) { + min = len; + /* More than one GPAs can be mapped into a single HVA. So + * iterate all possible umems here to be safe. + */ + list_for_each_entry(u, &umem->umem_list, link) { + if (u->userspace_addr > hva - 1 + len || + u->userspace_addr - 1 + u->size < hva) + continue; + start = max(u->userspace_addr, hva); + end = min(u->userspace_addr - 1 + u->size, + hva - 1 + len); + l = end - start + 1; + r = log_write(vq->log_base, + u->start + start - u->userspace_addr, + l); + if (r < 0) + return r; + hit = true; + min = min(l, min); + } + + if (!hit) + return -EFAULT; + + len -= min; + hva += min; + } + + return 0; +} + +static int log_used(struct vhost_virtqueue *vq, u64 used_offset, u64 len) +{ + struct iovec iov[64]; + int i, ret; + + if (!vq->iotlb) + return log_write(vq->log_base, vq->log_addr + used_offset, len); + + ret = translate_desc(vq, (uintptr_t)vq->used + used_offset, + len, iov, 64, VHOST_ACCESS_WO); + if (ret) + return ret; + + for (i = 0; i < ret; i++) { + ret = log_write_hva(vq, (uintptr_t)iov[i].iov_base, + iov[i].iov_len); + if (ret) + return ret; + } + + return 0; +} + int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, - unsigned int log_num, u64 len) + unsigned int log_num, u64 len, struct iovec *iov, int count) { int i, r; /* Make sure data written is seen before log. */ smp_wmb(); + + if (vq->iotlb) { + for (i = 0; i < count; i++) { + r = log_write_hva(vq, (uintptr_t)iov[i].iov_base, + iov[i].iov_len); + if (r < 0) + return r; + } + return 0; + } + for (i = 0; i < log_num; ++i) { u64 l = min(log[i].len, len); r = log_write(vq->log_base, log[i].addr, l); @@ -1773,9 +1847,8 @@ static int vhost_update_used_flags(struct vhost_virtqueue *vq) smp_wmb(); /* Log used flag write. */ used = &vq->used->flags; - log_write(vq->log_base, vq->log_addr + - (used - (void __user *)vq->used), - sizeof vq->used->flags); + log_used(vq, (used - (void __user *)vq->used), + sizeof vq->used->flags); if (vq->log_ctx) eventfd_signal(vq->log_ctx, 1); } @@ -1793,9 +1866,8 @@ static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event) smp_wmb(); /* Log avail event write */ used = vhost_avail_event(vq); - log_write(vq->log_base, vq->log_addr + - (used - (void __user *)vq->used), - sizeof *vhost_avail_event(vq)); + log_used(vq, (used - (void __user *)vq->used), + sizeof *vhost_avail_event(vq)); if (vq->log_ctx) eventfd_signal(vq->log_ctx, 1); } @@ -2195,10 +2267,8 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq, /* Make sure data is seen before log. */ smp_wmb(); /* Log used ring entry write. */ - log_write(vq->log_base, - vq->log_addr + - ((void __user *)used - (void __user *)vq->used), - count * sizeof *used); + log_used(vq, ((void __user *)used - (void __user *)vq->used), + count * sizeof *used); } old = vq->last_used_idx; new = (vq->last_used_idx += count); @@ -2240,9 +2310,8 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, /* Make sure used idx is seen before log. */ smp_wmb(); /* Log used index update. */ - log_write(vq->log_base, - vq->log_addr + offsetof(struct vring_used, idx), - sizeof vq->used->idx); + log_used(vq, offsetof(struct vring_used, idx), + sizeof vq->used->idx); if (vq->log_ctx) eventfd_signal(vq->log_ctx, 1); } diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h @@ -205,7 +205,8 @@ bool vhost_vq_avail_empty(struct vhost_dev *, struct vhost_virtqueue *); bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *); int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, - unsigned int log_num, u64 len); + unsigned int log_num, u64 len, + struct iovec *iov, int count); int vq_iotlb_prefetch(struct vhost_virtqueue *vq); struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type); diff --git a/include/linux/phy.h b/include/linux/phy.h @@ -48,6 +48,7 @@ extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_fibre_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_all_ports_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_features) __ro_after_init; +extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_fec_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_full_features) __ro_after_init; #define PHY_BASIC_FEATURES ((unsigned long *)&phy_basic_features) @@ -56,6 +57,7 @@ extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_full_features) __ro_after_ini #define PHY_GBIT_FIBRE_FEATURES ((unsigned long *)&phy_gbit_fibre_features) #define PHY_GBIT_ALL_PORTS_FEATURES ((unsigned long *)&phy_gbit_all_ports_features) #define PHY_10GBIT_FEATURES ((unsigned long *)&phy_10gbit_features) +#define PHY_10GBIT_FEC_FEATURES ((unsigned long *)&phy_10gbit_fec_features) #define PHY_10GBIT_FULL_FEATURES ((unsigned long *)&phy_10gbit_full_features) extern const int phy_10_100_features_array[4]; @@ -467,8 +469,8 @@ struct phy_device { * only works for PHYs with IDs which match this field * name: The friendly name of this PHY type * phy_id_mask: Defines the important bits of the phy_id - * features: A list of features (speed, duplex, etc) supported - * by this PHY + * features: A mandatory list of features (speed, duplex, etc) + * supported by this PHY * flags: A bitfield defining certain other features this PHY * supports (like interrupts) * diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h @@ -3218,6 +3218,7 @@ int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len); * * This is exactly the same as pskb_trim except that it ensures the * checksum of received packets are still valid after the operation. + * It can change skb pointers. */ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h @@ -21,18 +21,6 @@ struct socket; struct rxrpc_call; /* - * Call completion condition (state == RXRPC_CALL_COMPLETE). - */ -enum rxrpc_call_completion { - RXRPC_CALL_SUCCEEDED, /* - Normal termination */ - RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */ - RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */ - RXRPC_CALL_LOCAL_ERROR, /* - call failed due to local error */ - RXRPC_CALL_NETWORK_ERROR, /* - call terminated by network error */ - NR__RXRPC_CALL_COMPLETIONS -}; - -/* * Debug ID counter for tracing. */ extern atomic_t rxrpc_debug_id; @@ -73,10 +61,6 @@ int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t, rxrpc_user_attach_call_t, unsigned long, gfp_t, unsigned int); void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64); -int rxrpc_kernel_retry_call(struct socket *, struct rxrpc_call *, - struct sockaddr_rxrpc *, struct key *); -int rxrpc_kernel_check_call(struct socket *, struct rxrpc_call *, - enum rxrpc_call_completion *, u32 *); u32 rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *); void rxrpc_kernel_probe_life(struct socket *, struct rxrpc_call *); u32 rxrpc_kernel_get_epoch(struct socket *, struct rxrpc_call *); diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h @@ -241,7 +241,7 @@ int fib_table_delete(struct net *, struct fib_table *, struct fib_config *, struct netlink_ext_ack *extack); int fib_table_dump(struct fib_table *table, struct sk_buff *skb, struct netlink_callback *cb, struct fib_dump_filter *filter); -int fib_table_flush(struct net *net, struct fib_table *table); +int fib_table_flush(struct net *net, struct fib_table *table, bool flush_all); struct fib_table *fib_trie_unmerge(struct fib_table *main_tb); void fib_table_flush_external(struct fib_table *table); void fib_free_table(struct fib_table *tb); diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h @@ -84,7 +84,6 @@ struct flow_offload { struct nf_flow_route { struct { struct dst_entry *dst; - int ifindex; } tuple[FLOW_OFFLOAD_DIR_MAX]; }; diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h @@ -268,7 +268,7 @@ struct sockaddr_in { #define IN_MULTICAST(a) IN_CLASSD(a) #define IN_MULTICAST_NET 0xe0000000 -#define IN_BADCLASS(a) ((((long int) (a) ) == 0xffffffff) +#define IN_BADCLASS(a) (((long int) (a) ) == (long int)0xffffffff) #define IN_EXPERIMENTAL(a) IN_BADCLASS((a)) #define IN_CLASSE(a) ((((long int) (a)) & 0xf0000000) == 0xf0000000) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c @@ -467,7 +467,7 @@ static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t) return kind_ops[BTF_INFO_KIND(t->info)]; } -bool btf_name_offset_valid(const struct btf *btf, u32 offset) +static bool btf_name_offset_valid(const struct btf *btf, u32 offset) { return BTF_STR_OFFSET_VALID(offset) && offset < btf->hdr.str_len; diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c @@ -718,6 +718,7 @@ cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_trace_printk: if (capable(CAP_SYS_ADMIN)) return bpf_get_trace_printk_proto(); + /* fall through */ default: return NULL; } diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c @@ -12,6 +12,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) { struct bpf_map *inner_map, *inner_map_meta; + u32 inner_map_meta_size; struct fd f; f = fdget(inner_map_ufd); @@ -36,7 +37,12 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) return ERR_PTR(-EINVAL); } - inner_map_meta = kzalloc(sizeof(*inner_map_meta), GFP_USER); + inner_map_meta_size = sizeof(*inner_map_meta); + /* In some cases verifier needs to access beyond just base map. */ + if (inner_map->ops == &array_map_ops) + inner_map_meta_size = sizeof(struct bpf_array); + + inner_map_meta = kzalloc(inner_map_meta_size, GFP_USER); if (!inner_map_meta) { fdput(f); return ERR_PTR(-ENOMEM); @@ -46,9 +52,16 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) inner_map_meta->key_size = inner_map->key_size; inner_map_meta->value_size = inner_map->value_size; inner_map_meta->map_flags = inner_map->map_flags; - inner_map_meta->ops = inner_map->ops; inner_map_meta->max_entries = inner_map->max_entries; + /* Misc members not needed in bpf_map_meta_equal() check. */ + inner_map_meta->ops = inner_map->ops; + if (inner_map->ops == &array_map_ops) { + inner_map_meta->unpriv_array = inner_map->unpriv_array; + container_of(inner_map_meta, struct bpf_array, map)->index_mask = + container_of(inner_map, struct bpf_array, map)->index_mask; + } + fdput(f); return inner_map_meta; } diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c @@ -180,11 +180,14 @@ static inline int stack_map_parse_build_id(void *page_addr, if (nhdr->n_type == BPF_BUILD_ID && nhdr->n_namesz == sizeof("GNU") && - nhdr->n_descsz == BPF_BUILD_ID_SIZE) { + nhdr->n_descsz > 0 && + nhdr->n_descsz <= BPF_BUILD_ID_SIZE) { memcpy(build_id, note_start + note_offs + ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr), - BPF_BUILD_ID_SIZE); + nhdr->n_descsz); + memset(build_id + nhdr->n_descsz, 0, + BPF_BUILD_ID_SIZE - nhdr->n_descsz); return 0; } new_offs = note_offs + sizeof(Elf32_Nhdr) + @@ -311,6 +314,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, for (i = 0; i < trace_nr; i++) { id_offs[i].status = BPF_STACK_BUILD_ID_IP; id_offs[i].ip = ips[i]; + memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE); } return; } @@ -321,6 +325,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, /* per entry fall back to ips */ id_offs[i].status = BPF_STACK_BUILD_ID_IP; id_offs[i].ip = ips[i]; + memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE); continue; } id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i] diff --git a/net/bpfilter/bpfilter_umh_blob.S b/net/bpfilter/bpfilter_umh_blob.S @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ - .section .bpfilter_umh, "a" + .section .rodata, "a" .global bpfilter_umh_start bpfilter_umh_start: .incbin "net/bpfilter/bpfilter_umh" diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c @@ -1128,6 +1128,8 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, err = -ENOMEM; goto err_unlock; } + if (swdev_notify) + fdb->added_by_user = 1; fdb->added_by_external_learn = 1; fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify); } else { @@ -1147,6 +1149,9 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, modified = true; } + if (swdev_notify) + fdb->added_by_user = 1; + if (modified) fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify); } diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c @@ -36,10 +36,10 @@ static inline int should_deliver(const struct net_bridge_port *p, int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) { + skb_push(skb, ETH_HLEN); if (!is_skb_forwardable(skb->dev, skb)) goto drop; - skb_push(skb, ETH_HLEN); br_drop_fake_rtable(skb); if (skb->ip_summed == CHECKSUM_PARTIAL && @@ -98,12 +98,11 @@ static void __br_forward(const struct net_bridge_port *to, net = dev_net(indev); } else { if (unlikely(netpoll_tx_running(to->br->dev))) { - if (!is_skb_forwardable(skb->dev, skb)) { + skb_push(skb, ETH_HLEN); + if (!is_skb_forwardable(skb->dev, skb)) kfree_skb(skb); - } else { - skb_push(skb, ETH_HLEN); + else br_netpoll_send_skb(to, skb); - } return; } br_hook = NF_BR_LOCAL_OUT; diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c @@ -131,6 +131,7 @@ int br_validate_ipv6(struct net *net, struct sk_buff *skb) IPSTATS_MIB_INDISCARDS); goto drop; } + hdr = ipv6_hdr(skb); } if (hdr->nexthdr == NEXTHDR_HOP && br_nf_check_hbh_len(skb)) goto drop; diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c @@ -1137,14 +1137,16 @@ static int do_replace(struct net *net, const void __user *user, tmp.name[sizeof(tmp.name) - 1] = 0; countersize = COUNTER_OFFSET(tmp.nentries) * nr_cpu_ids; - newinfo = vmalloc(sizeof(*newinfo) + countersize); + newinfo = __vmalloc(sizeof(*newinfo) + countersize, GFP_KERNEL_ACCOUNT, + PAGE_KERNEL); if (!newinfo) return -ENOMEM; if (countersize) memset(newinfo->counters, 0, countersize); - newinfo->entries = vmalloc(tmp.entries_size); + newinfo->entries = __vmalloc(tmp.entries_size, GFP_KERNEL_ACCOUNT, + PAGE_KERNEL); if (!newinfo->entries) { ret = -ENOMEM; goto free_newinfo; diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c @@ -229,6 +229,7 @@ static bool reject6_br_csum_ok(struct sk_buff *skb, int hook) pskb_trim_rcsum(skb, ntohs(ip6h->payload_len) + sizeof(*ip6h))) return false; + ip6h = ipv6_hdr(skb); thoff = ipv6_skip_exthdr(skb, ((u8*)(ip6h+1) - skb->data), &proto, &fo); if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0) return false; diff --git a/net/core/filter.c b/net/core/filter.c @@ -2020,18 +2020,19 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev, u32 flags) { - /* skb->mac_len is not set on normal egress */ - unsigned int mlen = skb->network_header - skb->mac_header; + unsigned int mlen = skb_network_offset(skb); - __skb_pull(skb, mlen); + if (mlen) { + __skb_pull(skb, mlen); - /* At ingress, the mac header has already been pulled once. - * At egress, skb_pospull_rcsum has to be done in case that - * the skb is originated from ingress (i.e. a forwarded skb) - * to ensure that rcsum starts at net header. - */ - if (!skb_at_tc_ingress(skb)) - skb_postpull_rcsum(skb, skb_mac_header(skb), mlen); + /* At ingress, the mac header has already been pulled once. + * At egress, skb_pospull_rcsum has to be done in case that + * the skb is originated from ingress (i.e. a forwarded skb) + * to ensure that rcsum starts at net header. + */ + if (!skb_at_tc_ingress(skb)) + skb_postpull_rcsum(skb, skb_mac_header(skb), mlen); + } skb_pop_mac_header(skb); skb_reset_mac_len(skb); return flags & BPF_F_INGRESS ? @@ -4119,6 +4120,10 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF); break; case SO_MAX_PACING_RATE: /* 32bit version */ + if (val != ~0U) + cmpxchg(&sk->sk_pacing_status, + SK_PACING_NONE, + SK_PACING_NEEDED); sk->sk_max_pacing_rate = (val == ~0U) ? ~0UL : val; sk->sk_pacing_rate = min(sk->sk_pacing_rate, sk->sk_max_pacing_rate); @@ -4132,7 +4137,10 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, sk->sk_rcvlowat = val ? : 1; break; case SO_MARK: - sk->sk_mark = val; + if (sk->sk_mark != val) { + sk->sk_mark = val; + sk_dst_reset(sk); + } break; default: ret = -EINVAL; @@ -5309,7 +5317,7 @@ bpf_base_func_proto(enum bpf_func_id func_id) case BPF_FUNC_trace_printk: if (capable(CAP_SYS_ADMIN)) return bpf_get_trace_printk_proto(); - /* else: fall through */ + /* else, fall through */ default: return NULL; } diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c @@ -63,6 +63,7 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt, lwt->name ? : "<unknown>"); ret = BPF_OK; } else { + skb_reset_mac_header(skb); ret = skb_do_redirect(skb); if (ret == 0) ret = BPF_REDIRECT; diff --git a/net/core/neighbour.c b/net/core/neighbour.c @@ -450,7 +450,7 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) buckets = (struct neighbour __rcu **) __get_free_pages(GFP_ATOMIC | __GFP_ZERO, get_order(size)); - kmemleak_alloc(buckets, size, 0, GFP_ATOMIC); + kmemleak_alloc(buckets, size, 1, GFP_ATOMIC); } if (!buckets) { kfree(ret); @@ -1007,7 +1007,7 @@ static void neigh_probe(struct neighbour *neigh) if (neigh->ops->solicit) neigh->ops->solicit(neigh, skb); atomic_inc(&neigh->probes); - kfree_skb(skb); + consume_skb(skb); } /* Called when a timer expires for a neighbour entry. */ diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c @@ -203,7 +203,7 @@ static void fib_flush(struct net *net) struct fib_table *tb; hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) - flushed += fib_table_flush(net, tb); + flushed += fib_table_flush(net, tb, false); } if (flushed) @@ -1463,7 +1463,7 @@ static void ip_fib_net_exit(struct net *net) hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) { hlist_del(&tb->tb_hlist); - fib_table_flush(net, tb); + fib_table_flush(net, tb, true); fib_free_table(tb); } } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c @@ -1856,7 +1856,7 @@ void fib_table_flush_external(struct fib_table *tb) } /* Caller must hold RTNL. */ -int fib_table_flush(struct net *net, struct fib_table *tb) +int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all) { struct trie *t = (struct trie *)tb->tb_data; struct key_vector *pn = t->kv; @@ -1904,8 +1904,17 @@ int fib_table_flush(struct net *net, struct fib_table *tb) hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { struct fib_info *fi = fa->fa_info; - if (!fi || !(fi->fib_flags & RTNH_F_DEAD) || - tb->tb_id != fa->tb_id) { + if (!fi || tb->tb_id != fa->tb_id || + (!(fi->fib_flags & RTNH_F_DEAD) && + !fib_props[fa->fa_type].error)) { + slen = fa->fa_slen; + continue; + } + + /* Do not flush error routes if network namespace is + * not being dismantled + */ + if (!flush_all && fib_props[fa->fa_type].error) { slen = fa->fa_slen; continue; } diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c @@ -1020,10 +1020,11 @@ static int gue_err(struct sk_buff *skb, u32 info) { int transport_offset = skb_transport_offset(skb); struct guehdr *guehdr; - size_t optlen; + size_t len, optlen; int ret; - if (skb->len < sizeof(struct udphdr) + sizeof(struct guehdr)) + len = sizeof(struct udphdr) + sizeof(struct guehdr); + if (!pskb_may_pull(skb, len)) return -EINVAL; guehdr = (struct guehdr *)&udp_hdr(skb)[1]; @@ -1058,6 +1059,10 @@ static int gue_err(struct sk_buff *skb, u32 info) optlen = guehdr->hlen << 2; + if (!pskb_may_pull(skb, len + optlen)) + return -EINVAL; + + guehdr = (struct guehdr *)&udp_hdr(skb)[1]; if (validate_gue_flags(guehdr, optlen)) return -EINVAL; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c @@ -569,8 +569,7 @@ err_free_skb: dev->stats.tx_dropped++; } -static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev, - __be16 proto) +static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct ip_tunnel_info *tun_info; @@ -578,10 +577,10 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev, struct erspan_metadata *md; struct rtable *rt = NULL; bool truncate = false; + __be16 df, proto; struct flowi4 fl; int tunnel_hlen; int version; - __be16 df; int nhoff; int thoff; @@ -626,18 +625,20 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev, if (version == 1) { erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)), ntohl(md->u.index), truncate, true); + proto = htons(ETH_P_ERSPAN); } else if (version == 2) { erspan_build_header_v2(skb, ntohl(tunnel_id_to_key32(key->tun_id)), md->u.md2.dir, get_hwid(&md->u.md2), truncate, true); + proto = htons(ETH_P_ERSPAN2); } else { goto err_free_rt; } gre_build_header(skb, 8, TUNNEL_SEQ, - htons(ETH_P_ERSPAN), 0, htonl(tunnel->o_seqno++)); + proto, 0, htonl(tunnel->o_seqno++)); df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; @@ -721,12 +722,13 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb, { struct ip_tunnel *tunnel = netdev_priv(dev); bool truncate = false; + __be16 proto; if (!pskb_inet_may_pull(skb)) goto free_skb; if (tunnel->collect_md) { - erspan_fb_xmit(skb, dev, skb->protocol); + erspan_fb_xmit(skb, dev); return NETDEV_TX_OK; } @@ -742,19 +744,22 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb, } /* Push ERSPAN header */ - if (tunnel->erspan_ver == 1) + if (tunnel->erspan_ver == 1) { erspan_build_header(skb, ntohl(tunnel->parms.o_key), tunnel->index, truncate, true); - else if (tunnel->erspan_ver == 2) + proto = htons(ETH_P_ERSPAN); + } else if (tunnel->erspan_ver == 2) { erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key), tunnel->dir, tunnel->hwid, truncate, true); - else + proto = htons(ETH_P_ERSPAN2); + } else { goto free_skb; + } tunnel->parms.o_flags &= ~TUNNEL_KEY; - __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN)); + __gre_xmit(skb, dev, &tunnel->parms.iph, proto); return NETDEV_TX_OK; free_skb: diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c @@ -488,6 +488,7 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net) goto drop; } + iph = ip_hdr(skb); skb->transport_header = skb->network_header + iph->ihl*4; /* Remove any debris in the socket control block */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c @@ -1186,7 +1186,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) flags = msg->msg_flags; if (flags & MSG_ZEROCOPY && size && sock_flag(sk, SOCK_ZEROCOPY)) { - if (sk->sk_state != TCP_ESTABLISHED) { + if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) { err = -EINVAL; goto out_err; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c @@ -847,15 +847,23 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4, const int hlen = skb_network_header_len(skb) + sizeof(struct udphdr); - if (hlen + cork->gso_size > cork->fragsize) + if (hlen + cork->gso_size > cork->fragsize) { + kfree_skb(skb); return -EINVAL; - if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) + } + if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) { + kfree_skb(skb); return -EINVAL; - if (sk->sk_no_check_tx) + } + if (sk->sk_no_check_tx) { + kfree_skb(skb); return -EINVAL; + } if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite || - dst_xfrm(skb_dst(skb))) + dst_xfrm(skb_dst(skb))) { + kfree_skb(skb); return -EIO; + } skb_shinfo(skb)->gso_size = cork->gso_size; skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4; @@ -1918,7 +1926,7 @@ void udp_lib_rehash(struct sock *sk, u16 newhash) } EXPORT_SYMBOL(udp_lib_rehash); -static void udp_v4_rehash(struct sock *sk) +void udp_v4_rehash(struct sock *sk) { u16 new_hash = ipv4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h @@ -10,6 +10,7 @@ int __udp4_lib_rcv(struct sk_buff *, struct udp_table *, int); int __udp4_lib_err(struct sk_buff *, u32, struct udp_table *); int udp_v4_get_port(struct sock *sk, unsigned short snum); +void udp_v4_rehash(struct sock *sk); int udp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen); diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c @@ -53,6 +53,7 @@ struct proto udplite_prot = { .sendpage = udp_sendpage, .hash = udp_lib_hash, .unhash = udp_lib_unhash, + .rehash = udp_v4_rehash, .get_port = udp_v4_get_port, .memory_allocated = &udp_memory_allocated, .sysctl_mem = sysctl_udp_mem, diff --git a/net/ipv6/fou6.c b/net/ipv6/fou6.c @@ -90,10 +90,11 @@ static int gue6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, { int transport_offset = skb_transport_offset(skb); struct guehdr *guehdr; - size_t optlen; + size_t len, optlen; int ret; - if (skb->len < sizeof(struct udphdr) + sizeof(struct guehdr)) + len = sizeof(struct udphdr) + sizeof(struct guehdr); + if (!pskb_may_pull(skb, len)) return -EINVAL; guehdr = (struct guehdr *)&udp_hdr(skb)[1]; @@ -128,6 +129,10 @@ static int gue6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, optlen = guehdr->hlen << 2; + if (!pskb_may_pull(skb, len + optlen)) + return -EINVAL; + + guehdr = (struct guehdr *)&udp_hdr(skb)[1]; if (validate_gue_flags(guehdr, optlen)) return -EINVAL; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c @@ -922,6 +922,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, __u8 dsfield = false; struct flowi6 fl6; int err = -EINVAL; + __be16 proto; __u32 mtu; int nhoff; int thoff; @@ -1035,8 +1036,9 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, } /* Push GRE header. */ - gre_build_header(skb, 8, TUNNEL_SEQ, - htons(ETH_P_ERSPAN), 0, htonl(t->o_seqno++)); + proto = (t->parms.erspan_ver == 1) ? htons(ETH_P_ERSPAN) + : htons(ETH_P_ERSPAN2); + gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(t->o_seqno++)); /* TooBig packet may have updated dst->dev's mtu */ if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu) @@ -1169,6 +1171,10 @@ static void ip6gre_tnl_copy_tnl_parm(struct ip6_tnl *t, t->parms.i_flags = p->i_flags; t->parms.o_flags = p->o_flags; t->parms.fwmark = p->fwmark; + t->parms.erspan_ver = p->erspan_ver; + t->parms.index = p->index; + t->parms.dir = p->dir; + t->parms.hwid = p->hwid; dst_cache_reset(&t->dst_cache); } @@ -2025,9 +2031,9 @@ static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { - struct ip6gre_net *ign = net_generic(dev_net(dev), ip6gre_net_id); + struct ip6_tnl *t = netdev_priv(dev); + struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id); struct __ip6_tnl_parm p; - struct ip6_tnl *t; t = ip6gre_changelink_common(dev, tb, data, &p, extack); if (IS_ERR(t)) diff --git a/net/ipv6/route.c b/net/ipv6/route.c @@ -4251,17 +4251,6 @@ struct rt6_nh { struct list_head next; }; -static void ip6_print_replace_route_err(struct list_head *rt6_nh_list) -{ - struct rt6_nh *nh; - - list_for_each_entry(nh, rt6_nh_list, next) { - pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n", - &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway, - nh->r_cfg.fc_ifindex); - } -} - static int ip6_route_info_append(struct net *net, struct list_head *rt6_nh_list, struct fib6_info *rt, @@ -4407,7 +4396,8 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, nh->fib6_info = NULL; if (err) { if (replace && nhn) - ip6_print_replace_route_err(&rt6_nh_list); + NL_SET_ERR_MSG_MOD(extack, + "multipath route replace failed (check consistency of installed routes)"); err_nh = nh; goto add_errout; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c @@ -102,7 +102,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum) return udp_lib_get_port(sk, snum, hash2_nulladdr); } -static void udp_v6_rehash(struct sock *sk) +void udp_v6_rehash(struct sock *sk) { u16 new_hash = ipv6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, @@ -1132,15 +1132,23 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, const int hlen = skb_network_header_len(skb) + sizeof(struct udphdr); - if (hlen + cork->gso_size > cork->fragsize) + if (hlen + cork->gso_size > cork->fragsize) { + kfree_skb(skb); return -EINVAL; - if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) + } + if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) { + kfree_skb(skb); return -EINVAL; - if (udp_sk(sk)->no_check6_tx) + } + if (udp_sk(sk)->no_check6_tx) { + kfree_skb(skb); return -EINVAL; + } if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite || - dst_xfrm(skb_dst(skb))) + dst_xfrm(skb_dst(skb))) { + kfree_skb(skb); return -EIO; + } skb_shinfo(skb)->gso_size = cork->gso_size; skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4; diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h @@ -13,6 +13,7 @@ int __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, u8, u8, int, __be32, struct udp_table *); int udp_v6_get_port(struct sock *sk, unsigned short snum); +void udp_v6_rehash(struct sock *sk); int udpv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c @@ -49,6 +49,7 @@ struct proto udplitev6_prot = { .recvmsg = udpv6_recvmsg, .hash = udp_lib_hash, .unhash = udp_lib_unhash, + .rehash = udp_v6_rehash, .get_port = udp_v6_get_port, .memory_allocated = &udp_memory_allocated, .sysctl_mem = sysctl_udp_mem, diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c @@ -28,6 +28,7 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct, { struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple; struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple; + struct dst_entry *other_dst = route->tuple[!dir].dst; struct dst_entry *dst = route->tuple[dir].dst; ft->dir = dir; @@ -50,8 +51,8 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct, ft->src_port = ctt->src.u.tcp.port; ft->dst_port = ctt->dst.u.tcp.port; - ft->iifidx = route->tuple[dir].ifindex; - ft->oifidx = route->tuple[!dir].ifindex; + ft->iifidx = other_dst->dev->ifindex; + ft->oifidx = dst->dev->ifindex; ft->dst_cache = dst; } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c @@ -2304,7 +2304,6 @@ static int __nf_tables_dump_rules(struct sk_buff *skb, struct net *net = sock_net(skb->sk); unsigned int s_idx = cb->args[0]; const struct nft_rule *rule; - int rc = 1; list_for_each_entry_rcu(rule, &chain->rules, list) { if (!nft_is_active(net, rule)) @@ -2321,16 +2320,13 @@ static int __nf_tables_dump_rules(struct sk_buff *skb, NLM_F_MULTI | NLM_F_APPEND, table->family, table, chain, rule) < 0) - goto out_unfinished; + return 1; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: (*idx)++; } - rc = 0; -out_unfinished: - cb->args[0] = *idx; - return rc; + return 0; } static int nf_tables_dump_rules(struct sk_buff *skb, @@ -2354,7 +2350,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb, if (ctx && ctx->table && strcmp(ctx->table, table->name) != 0) continue; - if (ctx && ctx->chain) { + if (ctx && ctx->table && ctx->chain) { struct rhlist_head *list, *tmp; list = rhltable_lookup(&table->chains_ht, ctx->chain, @@ -2382,6 +2378,8 @@ static int nf_tables_dump_rules(struct sk_buff *skb, } done: rcu_read_unlock(); + + cb->args[0] = idx; return skb->len; } @@ -4508,6 +4506,8 @@ err6: err5: kfree(trans); err4: + if (obj) + obj->use--; kfree(elem.priv); err3: if (nla[NFTA_SET_ELEM_DATA] != NULL) diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c @@ -12,6 +12,7 @@ #include <net/netfilter/nf_conntrack_core.h> #include <linux/netfilter/nf_conntrack_common.h> #include <net/netfilter/nf_flow_table.h> +#include <net/netfilter/nf_conntrack_helper.h> struct nft_flow_offload { struct nft_flowtable *flowtable; @@ -29,10 +30,12 @@ static int nft_flow_route(const struct nft_pktinfo *pkt, memset(&fl, 0, sizeof(fl)); switch (nft_pf(pkt)) { case NFPROTO_IPV4: - fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.dst.u3.ip; + fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip; + fl.u.ip4.flowi4_oif = nft_in(pkt)->ifindex; break; case NFPROTO_IPV6: - fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.dst.u3.in6; + fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6; + fl.u.ip6.flowi6_oif = nft_in(pkt)->ifindex; break; } @@ -41,9 +44,7 @@ static int nft_flow_route(const struct nft_pktinfo *pkt, return -ENOENT; route->tuple[dir].dst = this_dst; - route->tuple[dir].ifindex = nft_in(pkt)->ifindex; route->tuple[!dir].dst = other_dst; - route->tuple[!dir].ifindex = nft_out(pkt)->ifindex; return 0; } @@ -66,6 +67,7 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, { struct nft_flow_offload *priv = nft_expr_priv(expr); struct nf_flowtable *flowtable = &priv->flowtable->data; + const struct nf_conn_help *help; enum ip_conntrack_info ctinfo; struct nf_flow_route route; struct flow_offload *flow; @@ -88,7 +90,8 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, goto out; } - if (test_bit(IPS_HELPER_BIT, &ct->status)) + help = nfct_help(ct); + if (help) goto out; if (ctinfo == IP_CT_NEW || diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c @@ -500,7 +500,7 @@ static int __parse_flow_nlattrs(const struct nlattr *attr, return -EINVAL; } - if (!nz || !is_all_zero(nla_data(nla), expected_len)) { + if (!nz || !is_all_zero(nla_data(nla), nla_len(nla))) { attrs |= 1 << type; a[type] = nla; } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c @@ -2887,7 +2887,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) goto out_free; } else if (reserve) { skb_reserve(skb, -reserve); - if (len < reserve) + if (len < reserve + sizeof(struct ipv6hdr) && + dev->min_header_len != dev->hard_header_len) skb_reset_network_header(skb); } diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c @@ -419,76 +419,6 @@ u32 rxrpc_kernel_get_epoch(struct socket *sock, struct rxrpc_call *call) EXPORT_SYMBOL(rxrpc_kernel_get_epoch); /** - * rxrpc_kernel_check_call - Check a call's state - * @sock: The socket the call is on - * @call: The call to check - * @_compl: Where to store the completion state - * @_abort_code: Where to store any abort code - * - * Allow a kernel service to query the state of a call and find out the manner - * of its termination if it has completed. Returns -EINPROGRESS if the call is - * still going, 0 if the call finished successfully, -ECONNABORTED if the call - * was aborted and an appropriate error if the call failed in some other way. - */ -int rxrpc_kernel_check_call(struct socket *sock, struct rxrpc_call *call, - enum rxrpc_call_completion *_compl, u32 *_abort_code) -{ - if (call->state != RXRPC_CALL_COMPLETE) - return -EINPROGRESS; - smp_rmb(); - *_compl = call->completion; - *_abort_code = call->abort_code; - return call->error; -} -EXPORT_SYMBOL(rxrpc_kernel_check_call); - -/** - * rxrpc_kernel_retry_call - Allow a kernel service to retry a call - * @sock: The socket the call is on - * @call: The call to retry - * @srx: The address of the peer to contact - * @key: The security context to use (defaults to socket setting) - * - * Allow a kernel service to try resending a client call that failed due to a - * network error to a new address. The Tx queue is maintained intact, thereby - * relieving the need to re-encrypt any request data that has already been - * buffered. - */ -int rxrpc_kernel_retry_call(struct socket *sock, struct rxrpc_call *call, - struct sockaddr_rxrpc *srx, struct key *key) -{ - struct rxrpc_conn_parameters cp; - struct rxrpc_sock *rx = rxrpc_sk(sock->sk); - int ret; - - _enter("%d{%d}", call->debug_id, atomic_read(&call->usage)); - - if (!key) - key = rx->key; - if (key && !key->payload.data[0]) - key = NULL; /* a no-security key */ - - memset(&cp, 0, sizeof(cp)); - cp.local = rx->local; - cp.key = key; - cp.security_level = 0; - cp.exclusive = false; - cp.service_id = srx->srx_service; - - mutex_lock(&call->user_mutex); - - ret = rxrpc_prepare_call_for_retry(rx, call); - if (ret == 0) - ret = rxrpc_retry_client_call(rx, call, &cp, srx, GFP_KERNEL); - - mutex_unlock(&call->user_mutex); - rxrpc_put_peer(cp.peer); - _leave(" = %d", ret); - return ret; -} -EXPORT_SYMBOL(rxrpc_kernel_retry_call); - -/** * rxrpc_kernel_new_call_notification - Get notifications of new calls * @sock: The socket to intercept received messages on * @notify_new_call: Function to be called when new calls appear diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h @@ -476,7 +476,6 @@ enum rxrpc_call_flag { RXRPC_CALL_EXPOSED, /* The call was exposed to the world */ RXRPC_CALL_RX_LAST, /* Received the last packet (at rxtx_top) */ RXRPC_CALL_TX_LAST, /* Last packet in Tx buffer (at rxtx_top) */ - RXRPC_CALL_TX_LASTQ, /* Last packet has been queued */ RXRPC_CALL_SEND_PING, /* A ping will need to be sent */ RXRPC_CALL_PINGING, /* Ping in process */ RXRPC_CALL_RETRANS_TIMEOUT, /* Retransmission due to timeout occurred */ @@ -518,6 +517,18 @@ enum rxrpc_call_state { }; /* + * Call completion condition (state == RXRPC_CALL_COMPLETE). + */ +enum rxrpc_call_completion { + RXRPC_CALL_SUCCEEDED, /* - Normal termination */ + RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */ + RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */ + RXRPC_CALL_LOCAL_ERROR, /* - call failed due to local error */ + RXRPC_CALL_NETWORK_ERROR, /* - call terminated by network error */ + NR__RXRPC_CALL_COMPLETIONS +}; + +/* * Call Tx congestion management modes. */ enum rxrpc_congest_mode { @@ -761,15 +772,9 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *, struct sockaddr_rxrpc *, struct rxrpc_call_params *, gfp_t, unsigned int); -int rxrpc_retry_client_call(struct rxrpc_sock *, - struct rxrpc_call *, - struct rxrpc_conn_parameters *, - struct sockaddr_rxrpc *, - gfp_t); void rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_call *, struct sk_buff *); void rxrpc_release_call(struct rxrpc_sock *, struct rxrpc_call *); -int rxrpc_prepare_call_for_retry(struct rxrpc_sock *, struct rxrpc_call *); void rxrpc_release_calls_on_socket(struct rxrpc_sock *); bool __rxrpc_queue_call(struct rxrpc_call *); bool rxrpc_queue_call(struct rxrpc_call *); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c @@ -325,48 +325,6 @@ error: } /* - * Retry a call to a new address. It is expected that the Tx queue of the call - * will contain data previously packaged for an old call. - */ -int rxrpc_retry_client_call(struct rxrpc_sock *rx, - struct rxrpc_call *call, - struct rxrpc_conn_parameters *cp, - struct sockaddr_rxrpc *srx, - gfp_t gfp) -{ - const void *here = __builtin_return_address(0); - int ret; - - /* Set up or get a connection record and set the protocol parameters, - * including channel number and call ID. - */ - ret = rxrpc_connect_call(rx, call, cp, srx, gfp); - if (ret < 0) - goto error; - - trace_rxrpc_call(call, rxrpc_call_connected, atomic_read(&call->usage), - here, NULL); - - rxrpc_start_call_timer(call); - - _net("CALL new %d on CONN %d", call->debug_id, call->conn->debug_id); - - if (!test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) - rxrpc_queue_call(call); - - _leave(" = 0"); - return 0; - -error: - rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, - RX_CALL_DEAD, ret); - trace_rxrpc_call(call, rxrpc_call_error, atomic_read(&call->usage), - here, ERR_PTR(ret)); - _leave(" = %d", ret); - return ret; -} - -/* * Set up an incoming call. call->conn points to the connection. * This is called in BH context and isn't allowed to fail. */ @@ -534,61 +492,6 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) } /* - * Prepare a kernel service call for retry. - */ -int rxrpc_prepare_call_for_retry(struct rxrpc_sock *rx, struct rxrpc_call *call) -{ - const void *here = __builtin_return_address(0); - int i; - u8 last = 0; - - _enter("{%d,%d}", call->debug_id, atomic_read(&call->usage)); - - trace_rxrpc_call(call, rxrpc_call_release, atomic_read(&call->usage), - here, (const void *)call->flags); - - ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); - ASSERTCMP(call->completion, !=, RXRPC_CALL_REMOTELY_ABORTED); - ASSERTCMP(call->completion, !=, RXRPC_CALL_LOCALLY_ABORTED); - ASSERT(list_empty(&call->recvmsg_link)); - - del_timer_sync(&call->timer); - - _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, call->conn); - - if (call->conn) - rxrpc_disconnect_call(call); - - if (rxrpc_is_service_call(call) || - !call->tx_phase || - call->tx_hard_ack != 0 || - call->rx_hard_ack != 0 || - call->rx_top != 0) - return -EINVAL; - - call->state = RXRPC_CALL_UNINITIALISED; - call->completion = RXRPC_CALL_SUCCEEDED; - call->call_id = 0; - call->cid = 0; - call->cong_cwnd = 0; - call->cong_extra = 0; - call->cong_ssthresh = 0; - call->cong_mode = 0; - call->cong_dup_acks = 0; - call->cong_cumul_acks = 0; - call->acks_lowest_nak = 0; - - for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) { - last |= call->rxtx_annotations[i]; - call->rxtx_annotations[i] &= RXRPC_TX_ANNO_LAST; - call->rxtx_annotations[i] |= RXRPC_TX_ANNO_RETRANS; - } - - _leave(" = 0"); - return 0; -} - -/* * release all the calls associated with a socket */ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c @@ -562,10 +562,7 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, clear_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags); write_lock_bh(&call->state_lock); - if (!test_bit(RXRPC_CALL_TX_LASTQ, &call->flags)) - call->state = RXRPC_CALL_CLIENT_SEND_REQUEST; - else - call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY; + call->state = RXRPC_CALL_CLIENT_SEND_REQUEST; write_unlock_bh(&call->state_lock); rxrpc_see_call(call); diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c @@ -169,10 +169,8 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, ASSERTCMP(seq, ==, call->tx_top + 1); - if (last) { + if (last) annotation |= RXRPC_TX_ANNO_LAST; - set_bit(RXRPC_CALL_TX_LASTQ, &call->flags); - } /* We have to set the timestamp before queueing as the retransmit * algorithm can see the packet as soon as we queue it. @@ -386,6 +384,11 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, call->tx_total_len -= copy; } + /* check for the far side aborting the call or a network error + * occurring */ + if (call->state == RXRPC_CALL_COMPLETE) + goto call_terminated; + /* add the packet to the send queue if it's now full */ if (sp->remain <= 0 || (msg_data_left(msg) == 0 && !more)) { @@ -425,16 +428,6 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, notify_end_tx); skb = NULL; } - - /* Check for the far side aborting the call or a network error - * occurring. If this happens, save any packet that was under - * construction so that in the case of a network error, the - * call can be retried or redirected. - */ - if (call->state == RXRPC_CALL_COMPLETE) { - ret = call->error; - goto out; - } } while (msg_data_left(msg) > 0); success: @@ -444,6 +437,11 @@ out: _leave(" = %d", ret); return ret; +call_terminated: + rxrpc_free_skb(skb, rxrpc_skb_tx_freed); + _leave(" = %d", call->error); + return call->error; + maybe_error: if (copied) goto success; diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c @@ -197,6 +197,15 @@ static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = { [TCA_TUNNEL_KEY_ENC_TTL] = { .type = NLA_U8 }, }; +static void tunnel_key_release_params(struct tcf_tunnel_key_params *p) +{ + if (!p) + return; + if (p->tcft_action == TCA_TUNNEL_KEY_ACT_SET) + dst_release(&p->tcft_enc_metadata->dst); + kfree_rcu(p, rcu); +} + static int tunnel_key_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, @@ -360,8 +369,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, rcu_swap_protected(t->params, params_new, lockdep_is_held(&t->tcf_lock)); spin_unlock_bh(&t->tcf_lock); - if (params_new) - kfree_rcu(params_new, rcu); + tunnel_key_release_params(params_new); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); @@ -385,12 +393,7 @@ static void tunnel_key_release(struct tc_action *a) struct tcf_tunnel_key_params *params; params = rcu_dereference_protected(t->params, 1); - if (params) { - if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) - dst_release(&params->tcft_enc_metadata->dst); - - kfree_rcu(params, rcu); - } + tunnel_key_release_params(params); } static int tunnel_key_geneve_opts_dump(struct sk_buff *skb, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c @@ -1277,7 +1277,6 @@ EXPORT_SYMBOL(tcf_block_cb_unregister); int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode) { - __be16 protocol = tc_skb_protocol(skb); #ifdef CONFIG_NET_CLS_ACT const int max_reclassify_loop = 4; const struct tcf_proto *orig_tp = tp; @@ -1287,6 +1286,7 @@ int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, reclassify: #endif for (; tp; tp = rcu_dereference_bh(tp->next)) { + __be16 protocol = tc_skb_protocol(skb); int err; if (tp->protocol != protocol && @@ -1319,7 +1319,6 @@ reset: } tp = first_tp; - protocol = tc_skb_protocol(skb); goto reclassify; #endif } diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c @@ -1290,17 +1290,23 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, struct cls_fl_head *head = rtnl_dereference(tp->root); struct cls_fl_filter *fold = *arg; struct cls_fl_filter *fnew; + struct fl_flow_mask *mask; struct nlattr **tb; - struct fl_flow_mask mask = {}; int err; if (!tca[TCA_OPTIONS]) return -EINVAL; - tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL); - if (!tb) + mask = kzalloc(sizeof(struct fl_flow_mask), GFP_KERNEL); + if (!mask) return -ENOBUFS; + tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL); + if (!tb) { + err = -ENOBUFS; + goto errout_mask_alloc; + } + err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS], fl_policy, NULL); if (err < 0) @@ -1343,12 +1349,12 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, } } - err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr, + err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE], ovr, tp->chain->tmplt_priv, extack); if (err) goto errout_idr; - err = fl_check_assign_mask(head, fnew, fold, &mask); + err = fl_check_assign_mask(head, fnew, fold, mask); if (err) goto errout_idr; @@ -1392,6 +1398,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, } kfree(tb); + kfree(mask); return 0; errout_mask: @@ -1405,6 +1412,8 @@ errout: kfree(fnew); errout_tb: kfree(tb); +errout_mask_alloc: + kfree(mask); return err; } diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c @@ -1667,7 +1667,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (skb_is_gso(skb) && q->rate_flags & CAKE_FLAG_SPLIT_GSO) { struct sk_buff *segs, *nskb; netdev_features_t features = netif_skb_features(skb); - unsigned int slen = 0; + unsigned int slen = 0, numsegs = 0; segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); if (IS_ERR_OR_NULL(segs)) @@ -1683,6 +1683,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, flow_queue_add(flow, segs); sch->q.qlen++; + numsegs++; slen += segs->len; q->buffer_used += segs->truesize; b->packets++; @@ -1696,7 +1697,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, sch->qstats.backlog += slen; q->avg_window_bytes += slen; - qdisc_tree_reduce_backlog(sch, 1, len); + qdisc_tree_reduce_backlog(sch, 1-numsegs, len-slen); consume_skb(skb); } else { /* not splitting */ diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c @@ -88,13 +88,14 @@ static int cbs_child_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct Qdisc *child, struct sk_buff **to_free) { + unsigned int len = qdisc_pkt_len(skb); int err; err = child->ops->enqueue(skb, child, to_free); if (err != NET_XMIT_SUCCESS) return err; - qdisc_qstats_backlog_inc(sch, skb); + sch->qstats.backlog += len; sch->q.qlen++; return NET_XMIT_SUCCESS; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c @@ -350,9 +350,11 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch, static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { + unsigned int len = qdisc_pkt_len(skb); struct drr_sched *q = qdisc_priv(sch); struct drr_class *cl; int err = 0; + bool first; cl = drr_classify(skb, sch, &err); if (cl == NULL) { @@ -362,6 +364,7 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch, return err; } + first = !cl->qdisc->q.qlen; err = qdisc_enqueue(skb, cl->qdisc, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { if (net_xmit_drop_count(err)) { @@ -371,12 +374,12 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch, return err; } - if (cl->qdisc->q.qlen == 1) { + if (first) { list_add_tail(&cl->alist, &q->active); cl->deficit = cl->quantum; } - qdisc_qstats_backlog_inc(sch, skb); + sch->qstats.backlog += len; sch->q.qlen++; return err; } diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c @@ -199,6 +199,7 @@ static struct tcf_block *dsmark_tcf_block(struct Qdisc *sch, unsigned long cl, static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { + unsigned int len = qdisc_pkt_len(skb); struct dsmark_qdisc_data *p = qdisc_priv(sch); int err; @@ -271,7 +272,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, return err; } - qdisc_qstats_backlog_inc(sch, skb); + sch->qstats.backlog += len; sch->q.qlen++; return NET_XMIT_SUCCESS; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c @@ -1539,8 +1539,10 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb) static int hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { + unsigned int len = qdisc_pkt_len(skb); struct hfsc_class *cl; int uninitialized_var(err); + bool first; cl = hfsc_classify(skb, sch, &err); if (cl == NULL) { @@ -1550,6 +1552,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) return err; } + first = !cl->qdisc->q.qlen; err = qdisc_enqueue(skb, cl->qdisc, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { if (net_xmit_drop_count(err)) { @@ -1559,9 +1562,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) return err; } - if (cl->qdisc->q.qlen == 1) { - unsigned int len = qdisc_pkt_len(skb); - + if (first) { if (cl->cl_flags & HFSC_RSC) init_ed(cl, len); if (cl->cl_flags & HFSC_FSC) @@ -1576,7 +1577,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) } - qdisc_qstats_backlog_inc(sch, skb); + sch->qstats.backlog += len; sch->q.qlen++; return NET_XMIT_SUCCESS; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c @@ -581,6 +581,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { int uninitialized_var(ret); + unsigned int len = qdisc_pkt_len(skb); struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = htb_classify(skb, sch, &ret); @@ -610,7 +611,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch, htb_activate(q, cl); } - qdisc_qstats_backlog_inc(sch, skb); + sch->qstats.backlog += len; sch->q.qlen++; return NET_XMIT_SUCCESS; } diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c @@ -72,6 +72,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) static int prio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { + unsigned int len = qdisc_pkt_len(skb); struct Qdisc *qdisc; int ret; @@ -88,7 +89,7 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) ret = qdisc_enqueue(skb, qdisc, to_free); if (ret == NET_XMIT_SUCCESS) { - qdisc_qstats_backlog_inc(sch, skb); + sch->qstats.backlog += len; sch->q.qlen++; return NET_XMIT_SUCCESS; } diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c @@ -1210,10 +1210,12 @@ static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *q) static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { + unsigned int len = qdisc_pkt_len(skb), gso_segs; struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl; struct qfq_aggregate *agg; int err = 0; + bool first; cl = qfq_classify(skb, sch, &err); if (cl == NULL) { @@ -1224,17 +1226,18 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, } pr_debug("qfq_enqueue: cl = %x\n", cl->common.classid); - if (unlikely(cl->agg->lmax < qdisc_pkt_len(skb))) { + if (unlikely(cl->agg->lmax < len)) { pr_debug("qfq: increasing maxpkt from %u to %u for class %u", - cl->agg->lmax, qdisc_pkt_len(skb), cl->common.classid); - err = qfq_change_agg(sch, cl, cl->agg->class_weight, - qdisc_pkt_len(skb)); + cl->agg->lmax, len, cl->common.classid); + err = qfq_change_agg(sch, cl, cl->agg->class_weight, len); if (err) { cl->qstats.drops++; return qdisc_drop(skb, sch, to_free); } } + gso_segs = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1; + first = !cl->qdisc->q.qlen; err = qdisc_enqueue(skb, cl->qdisc, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { pr_debug("qfq_enqueue: enqueue failed %d\n", err); @@ -1245,16 +1248,17 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, return err; } - bstats_update(&cl->bstats, skb); - qdisc_qstats_backlog_inc(sch, skb); + cl->bstats.bytes += len; + cl->bstats.packets += gso_segs; + sch->qstats.backlog += len; ++sch->q.qlen; agg = cl->agg; /* if the queue was not empty, then done here */ - if (cl->qdisc->q.qlen != 1) { + if (!first) { if (unlikely(skb == cl->qdisc->ops->peek(cl->qdisc)) && list_first_entry(&agg->active, struct qfq_class, alist) - == cl && cl->deficit < qdisc_pkt_len(skb)) + == cl && cl->deficit < len) list_move_tail(&cl->alist, &agg->active); return err; diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c @@ -185,6 +185,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct tbf_sched_data *q = qdisc_priv(sch); + unsigned int len = qdisc_pkt_len(skb); int ret; if (qdisc_pkt_len(skb) > q->max_size) { @@ -200,7 +201,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch, return ret; } - qdisc_qstats_backlog_inc(sch, skb); + sch->qstats.backlog += len; sch->q.qlen++; return NET_XMIT_SUCCESS; } diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c @@ -97,11 +97,9 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, switch (ev) { case NETDEV_UP: - addr = kmalloc(sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC); + addr = kzalloc(sizeof(*addr), GFP_ATOMIC); if (addr) { addr->a.v6.sin6_family = AF_INET6; - addr->a.v6.sin6_port = 0; - addr->a.v6.sin6_flowinfo = 0; addr->a.v6.sin6_addr = ifa->addr; addr->a.v6.sin6_scope_id = ifa->idev->dev->ifindex; addr->valid = 1; @@ -434,7 +432,6 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist, addr = kzalloc(sizeof(*addr), GFP_ATOMIC); if (addr) { addr->a.v6.sin6_family = AF_INET6; - addr->a.v6.sin6_port = 0; addr->a.v6.sin6_addr = ifp->addr; addr->a.v6.sin6_scope_id = dev->ifindex; addr->valid = 1; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c @@ -101,7 +101,6 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist, addr = kzalloc(sizeof(*addr), GFP_ATOMIC); if (addr) { addr->a.v4.sin_family = AF_INET; - addr->a.v4.sin_port = 0; addr->a.v4.sin_addr.s_addr = ifa->ifa_local; addr->valid = 1; INIT_LIST_HEAD(&addr->list); @@ -776,10 +775,9 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, switch (ev) { case NETDEV_UP: - addr = kmalloc(sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC); + addr = kzalloc(sizeof(*addr), GFP_ATOMIC); if (addr) { addr->a.v4.sin_family = AF_INET; - addr->a.v4.sin_port = 0; addr->a.v4.sin_addr.s_addr = ifa->ifa_local; addr->valid = 1; spin_lock_bh(&net->sctp.local_addr_lock); diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c @@ -87,6 +87,11 @@ static int tipc_skb_tailroom(struct sk_buff *skb) return limit; } +static inline int TLV_GET_DATA_LEN(struct tlv_desc *tlv) +{ + return TLV_GET_LEN(tlv) - TLV_SPACE(0); +} + static int tipc_add_tlv(struct sk_buff *skb, u16 type, void *data, u16 len) { struct tlv_desc *tlv = (struct tlv_desc *)skb_tail_pointer(skb); @@ -166,6 +171,11 @@ static struct sk_buff *tipc_get_err_tlv(char *str) return buf; } +static inline bool string_is_valid(char *s, int len) +{ + return memchr(s, '\0', len) ? true : false; +} + static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, struct tipc_nl_compat_msg *msg, struct sk_buff *arg) @@ -379,6 +389,7 @@ static int tipc_nl_compat_bearer_enable(struct tipc_nl_compat_cmd_doit *cmd, struct nlattr *prop; struct nlattr *bearer; struct tipc_bearer_config *b; + int len; b = (struct tipc_bearer_config *)TLV_DATA(msg->req); @@ -386,6 +397,10 @@ static int tipc_nl_compat_bearer_enable(struct tipc_nl_compat_cmd_doit *cmd, if (!bearer) return -EMSGSIZE; + len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_BEARER_NAME); + if (!string_is_valid(b->name, len)) + return -EINVAL; + if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, b->name)) return -EMSGSIZE; @@ -411,6 +426,7 @@ static int tipc_nl_compat_bearer_disable(struct tipc_nl_compat_cmd_doit *cmd, { char *name; struct nlattr *bearer; + int len; name = (char *)TLV_DATA(msg->req); @@ -418,6 +434,10 @@ static int tipc_nl_compat_bearer_disable(struct tipc_nl_compat_cmd_doit *cmd, if (!bearer) return -EMSGSIZE; + len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_BEARER_NAME); + if (!string_is_valid(name, len)) + return -EINVAL; + if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, name)) return -EMSGSIZE; @@ -478,6 +498,7 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg, struct nlattr *prop[TIPC_NLA_PROP_MAX + 1]; struct nlattr *stats[TIPC_NLA_STATS_MAX + 1]; int err; + int len; if (!attrs[TIPC_NLA_LINK]) return -EINVAL; @@ -504,6 +525,11 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg, return err; name = (char *)TLV_DATA(msg->req); + + len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME); + if (!string_is_valid(name, len)) + return -EINVAL; + if (strcmp(name, nla_data(link[TIPC_NLA_LINK_NAME])) != 0) return 0; @@ -644,6 +670,7 @@ static int tipc_nl_compat_media_set(struct sk_buff *skb, struct nlattr *prop; struct nlattr *media; struct tipc_link_config *lc; + int len; lc = (struct tipc_link_config *)TLV_DATA(msg->req); @@ -651,6 +678,10 @@ static int tipc_nl_compat_media_set(struct sk_buff *skb, if (!media) return -EMSGSIZE; + len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_MEDIA_NAME); + if (!string_is_valid(lc->name, len)) + return -EINVAL; + if (nla_put_string(skb, TIPC_NLA_MEDIA_NAME, lc->name)) return -EMSGSIZE; @@ -671,6 +702,7 @@ static int tipc_nl_compat_bearer_set(struct sk_buff *skb, struct nlattr *prop; struct nlattr *bearer; struct tipc_link_config *lc; + int len; lc = (struct tipc_link_config *)TLV_DATA(msg->req); @@ -678,6 +710,10 @@ static int tipc_nl_compat_bearer_set(struct sk_buff *skb, if (!bearer) return -EMSGSIZE; + len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_MEDIA_NAME); + if (!string_is_valid(lc->name, len)) + return -EINVAL; + if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, lc->name)) return -EMSGSIZE; @@ -726,9 +762,14 @@ static int tipc_nl_compat_link_set(struct tipc_nl_compat_cmd_doit *cmd, struct tipc_link_config *lc; struct tipc_bearer *bearer; struct tipc_media *media; + int len; lc = (struct tipc_link_config *)TLV_DATA(msg->req); + len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME); + if (!string_is_valid(lc->name, len)) + return -EINVAL; + media = tipc_media_find(lc->name); if (media) { cmd->doit = &__tipc_nl_media_set; @@ -750,6 +791,7 @@ static int tipc_nl_compat_link_reset_stats(struct tipc_nl_compat_cmd_doit *cmd, { char *name; struct nlattr *link; + int len; name = (char *)TLV_DATA(msg->req); @@ -757,6 +799,10 @@ static int tipc_nl_compat_link_reset_stats(struct tipc_nl_compat_cmd_doit *cmd, if (!link) return -EMSGSIZE; + len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME); + if (!string_is_valid(name, len)) + return -EINVAL; + if (nla_put_string(skb, TIPC_NLA_LINK_NAME, name)) return -EMSGSIZE; @@ -778,6 +824,8 @@ static int tipc_nl_compat_name_table_dump_header(struct tipc_nl_compat_msg *msg) }; ntq = (struct tipc_name_table_query *)TLV_DATA(msg->req); + if (TLV_GET_DATA_LEN(msg->req) < sizeof(struct tipc_name_table_query)) + return -EINVAL; depth = ntohl(ntq->depth); @@ -1208,7 +1256,7 @@ static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info) } len = nlmsg_attrlen(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN); - if (len && !TLV_OK(msg.req, len)) { + if (!len || !TLV_OK(msg.req, len)) { msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_SUPPORTED); err = -EOPNOTSUPP; goto send; diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c @@ -398,7 +398,7 @@ static int tipc_conn_rcv_from_sock(struct tipc_conn *con) ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT); if (ret == -EWOULDBLOCK) return -EWOULDBLOCK; - if (ret > 0) { + if (ret == sizeof(s)) { read_lock_bh(&sk->sk_callback_lock); ret = tipc_conn_rcv_sub(srv, con, &s); read_unlock_bh(&sk->sk_callback_lock); diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c @@ -41,13 +41,20 @@ void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs) * not know if the device has more tx queues than rx, or the opposite. * This might also change during run time. */ -static void xdp_reg_umem_at_qid(struct net_device *dev, struct xdp_umem *umem, - u16 queue_id) +static int xdp_reg_umem_at_qid(struct net_device *dev, struct xdp_umem *umem, + u16 queue_id) { + if (queue_id >= max_t(unsigned int, + dev->real_num_rx_queues, + dev->real_num_tx_queues)) + return -EINVAL; + if (queue_id < dev->real_num_rx_queues) dev->_rx[queue_id].umem = umem; if (queue_id < dev->real_num_tx_queues) dev->_tx[queue_id].umem = umem; + + return 0; } struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev, @@ -88,7 +95,10 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, goto out_rtnl_unlock; } - xdp_reg_umem_at_qid(dev, umem, queue_id); + err = xdp_reg_umem_at_qid(dev, umem, queue_id); + if (err) + goto out_rtnl_unlock; + umem->dev = dev; umem->queue_id = queue_id; if (force_copy) diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile @@ -279,6 +279,7 @@ $(obj)/%.o: $(src)/%.c -Wno-gnu-variable-sized-type-not-at-end \ -Wno-address-of-packed-member -Wno-tautological-compare \ -Wno-unknown-warning-option $(CLANG_ARCH_ARGS) \ + -I$(srctree)/samples/bpf/ -include asm_goto_workaround.h \ -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf $(LLC_FLAGS) -filetype=obj -o $@ ifeq ($(DWARF2BTF),y) $(BTF_PAHOLE) -J $@ diff --git a/samples/bpf/asm_goto_workaround.h b/samples/bpf/asm_goto_workaround.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019 Facebook */ +#ifndef __ASM_GOTO_WORKAROUND_H +#define __ASM_GOTO_WORKAROUND_H + +/* this will bring in asm_volatile_goto macro definition + * if enabled by compiler and config options. + */ +#include <linux/types.h> + +#ifdef asm_volatile_goto +#undef asm_volatile_goto +#define asm_volatile_goto(x...) asm volatile("invalid use of asm_volatile_goto") +#endif + +#endif diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile @@ -93,9 +93,16 @@ BFD_SRCS = jit_disasm.c SRCS = $(filter-out $(BFD_SRCS),$(wildcard *.c)) ifeq ($(feature-libbfd),1) + LIBS += -lbfd -ldl -lopcodes +else ifeq ($(feature-libbfd-liberty),1) + LIBS += -lbfd -ldl -lopcodes -liberty +else ifeq ($(feature-libbfd-liberty-z),1) + LIBS += -lbfd -ldl -lopcodes -liberty -lz +endif + +ifneq ($(filter -lbfd,$(LIBS)),) CFLAGS += -DHAVE_LIBBFD_SUPPORT SRCS += $(BFD_SRCS) -LIBS += -lbfd -lopcodes endif OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o diff --git a/tools/bpf/bpftool/json_writer.c b/tools/bpf/bpftool/json_writer.c @@ -1,15 +1,10 @@ -// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) /* * Simple streaming JSON writer * * This takes care of the annoying bits of JSON syntax like the commas * after elements * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Stephen Hemminger <stephen@networkplumber.org> */ diff --git a/tools/bpf/bpftool/json_writer.h b/tools/bpf/bpftool/json_writer.h @@ -5,11 +5,6 @@ * This takes care of the annoying bits of JSON syntax like the commas * after elements * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Stephen Hemminger <stephen@networkplumber.org> */ diff --git a/tools/include/uapi/linux/pkt_sched.h b/tools/include/uapi/linux/pkt_sched.h @@ -0,0 +1,1163 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __LINUX_PKT_SCHED_H +#define __LINUX_PKT_SCHED_H + +#include <linux/types.h> + +/* Logical priority bands not depending on specific packet scheduler. + Every scheduler will map them to real traffic classes, if it has + no more precise mechanism to classify packets. + + These numbers have no special meaning, though their coincidence + with obsolete IPv6 values is not occasional :-). New IPv6 drafts + preferred full anarchy inspired by diffserv group. + + Note: TC_PRIO_BESTEFFORT does not mean that it is the most unhappy + class, actually, as rule it will be handled with more care than + filler or even bulk. + */ + +#define TC_PRIO_BESTEFFORT 0 +#define TC_PRIO_FILLER 1 +#define TC_PRIO_BULK 2 +#define TC_PRIO_INTERACTIVE_BULK 4 +#define TC_PRIO_INTERACTIVE 6 +#define TC_PRIO_CONTROL 7 + +#define TC_PRIO_MAX 15 + +/* Generic queue statistics, available for all the elements. + Particular schedulers may have also their private records. + */ + +struct tc_stats { + __u64 bytes; /* Number of enqueued bytes */ + __u32 packets; /* Number of enqueued packets */ + __u32 drops; /* Packets dropped because of lack of resources */ + __u32 overlimits; /* Number of throttle events when this + * flow goes out of allocated bandwidth */ + __u32 bps; /* Current flow byte rate */ + __u32 pps; /* Current flow packet rate */ + __u32 qlen; + __u32 backlog; +}; + +struct tc_estimator { + signed char interval; + unsigned char ewma_log; +}; + +/* "Handles" + --------- + + All the traffic control objects have 32bit identifiers, or "handles". + + They can be considered as opaque numbers from user API viewpoint, + but actually they always consist of two fields: major and + minor numbers, which are interpreted by kernel specially, + that may be used by applications, though not recommended. + + F.e. qdisc handles always have minor number equal to zero, + classes (or flows) have major equal to parent qdisc major, and + minor uniquely identifying class inside qdisc. + + Macros to manipulate handles: + */ + +#define TC_H_MAJ_MASK (0xFFFF0000U) +#define TC_H_MIN_MASK (0x0000FFFFU) +#define TC_H_MAJ(h) ((h)&TC_H_MAJ_MASK) +#define TC_H_MIN(h) ((h)&TC_H_MIN_MASK) +#define TC_H_MAKE(maj,min) (((maj)&TC_H_MAJ_MASK)|((min)&TC_H_MIN_MASK)) + +#define TC_H_UNSPEC (0U) +#define TC_H_ROOT (0xFFFFFFFFU) +#define TC_H_INGRESS (0xFFFFFFF1U) +#define TC_H_CLSACT TC_H_INGRESS + +#define TC_H_MIN_PRIORITY 0xFFE0U +#define TC_H_MIN_INGRESS 0xFFF2U +#define TC_H_MIN_EGRESS 0xFFF3U + +/* Need to corrospond to iproute2 tc/tc_core.h "enum link_layer" */ +enum tc_link_layer { + TC_LINKLAYER_UNAWARE, /* Indicate unaware old iproute2 util */ + TC_LINKLAYER_ETHERNET, + TC_LINKLAYER_ATM, +}; +#define TC_LINKLAYER_MASK 0x0F /* limit use to lower 4 bits */ + +struct tc_ratespec { + unsigned char cell_log; + __u8 linklayer; /* lower 4 bits */ + unsigned short overhead; + short cell_align; + unsigned short mpu; + __u32 rate; +}; + +#define TC_RTAB_SIZE 1024 + +struct tc_sizespec { + unsigned char cell_log; + unsigned char size_log; + short cell_align; + int overhead; + unsigned int linklayer; + unsigned int mpu; + unsigned int mtu; + unsigned int tsize; +}; + +enum { + TCA_STAB_UNSPEC, + TCA_STAB_BASE, + TCA_STAB_DATA, + __TCA_STAB_MAX +}; + +#define TCA_STAB_MAX (__TCA_STAB_MAX - 1) + +/* FIFO section */ + +struct tc_fifo_qopt { + __u32 limit; /* Queue length: bytes for bfifo, packets for pfifo */ +}; + +/* SKBPRIO section */ + +/* + * Priorities go from zero to (SKBPRIO_MAX_PRIORITY - 1). + * SKBPRIO_MAX_PRIORITY should be at least 64 in order for skbprio to be able + * to map one to one the DS field of IPV4 and IPV6 headers. + * Memory allocation grows linearly with SKBPRIO_MAX_PRIORITY. + */ + +#define SKBPRIO_MAX_PRIORITY 64 + +struct tc_skbprio_qopt { + __u32 limit; /* Queue length in packets. */ +}; + +/* PRIO section */ + +#define TCQ_PRIO_BANDS 16 +#define TCQ_MIN_PRIO_BANDS 2 + +struct tc_prio_qopt { + int bands; /* Number of bands */ + __u8 priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> PRIO band */ +}; + +/* MULTIQ section */ + +struct tc_multiq_qopt { + __u16 bands; /* Number of bands */ + __u16 max_bands; /* Maximum number of queues */ +}; + +/* PLUG section */ + +#define TCQ_PLUG_BUFFER 0 +#define TCQ_PLUG_RELEASE_ONE 1 +#define TCQ_PLUG_RELEASE_INDEFINITE 2 +#define TCQ_PLUG_LIMIT 3 + +struct tc_plug_qopt { + /* TCQ_PLUG_BUFFER: Inset a plug into the queue and + * buffer any incoming packets + * TCQ_PLUG_RELEASE_ONE: Dequeue packets from queue head + * to beginning of the next plug. + * TCQ_PLUG_RELEASE_INDEFINITE: Dequeue all packets from queue. + * Stop buffering packets until the next TCQ_PLUG_BUFFER + * command is received (just act as a pass-thru queue). + * TCQ_PLUG_LIMIT: Increase/decrease queue size + */ + int action; + __u32 limit; +}; + +/* TBF section */ + +struct tc_tbf_qopt { + struct tc_ratespec rate; + struct tc_ratespec peakrate; + __u32 limit; + __u32 buffer; + __u32 mtu; +}; + +enum { + TCA_TBF_UNSPEC, + TCA_TBF_PARMS, + TCA_TBF_RTAB, + TCA_TBF_PTAB, + TCA_TBF_RATE64, + TCA_TBF_PRATE64, + TCA_TBF_BURST, + TCA_TBF_PBURST, + TCA_TBF_PAD, + __TCA_TBF_MAX, +}; + +#define TCA_TBF_MAX (__TCA_TBF_MAX - 1) + + +/* TEQL section */ + +/* TEQL does not require any parameters */ + +/* SFQ section */ + +struct tc_sfq_qopt { + unsigned quantum; /* Bytes per round allocated to flow */ + int perturb_period; /* Period of hash perturbation */ + __u32 limit; /* Maximal packets in queue */ + unsigned divisor; /* Hash divisor */ + unsigned flows; /* Maximal number of flows */ +}; + +struct tc_sfqred_stats { + __u32 prob_drop; /* Early drops, below max threshold */ + __u32 forced_drop; /* Early drops, after max threshold */ + __u32 prob_mark; /* Marked packets, below max threshold */ + __u32 forced_mark; /* Marked packets, after max threshold */ + __u32 prob_mark_head; /* Marked packets, below max threshold */ + __u32 forced_mark_head;/* Marked packets, after max threshold */ +}; + +struct tc_sfq_qopt_v1 { + struct tc_sfq_qopt v0; + unsigned int depth; /* max number of packets per flow */ + unsigned int headdrop; +/* SFQRED parameters */ + __u32 limit; /* HARD maximal flow queue length (bytes) */ + __u32 qth_min; /* Min average length threshold (bytes) */ + __u32 qth_max; /* Max average length threshold (bytes) */ + unsigned char Wlog; /* log(W) */ + unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ + unsigned char Scell_log; /* cell size for idle damping */ + unsigned char flags; + __u32 max_P; /* probability, high resolution */ +/* SFQRED stats */ + struct tc_sfqred_stats stats; +}; + + +struct tc_sfq_xstats { + __s32 allot; +}; + +/* RED section */ + +enum { + TCA_RED_UNSPEC, + TCA_RED_PARMS, + TCA_RED_STAB, + TCA_RED_MAX_P, + __TCA_RED_MAX, +}; + +#define TCA_RED_MAX (__TCA_RED_MAX - 1) + +struct tc_red_qopt { + __u32 limit; /* HARD maximal queue length (bytes) */ + __u32 qth_min; /* Min average length threshold (bytes) */ + __u32 qth_max; /* Max average length threshold (bytes) */ + unsigned char Wlog; /* log(W) */ + unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ + unsigned char Scell_log; /* cell size for idle damping */ + unsigned char flags; +#define TC_RED_ECN 1 +#define TC_RED_HARDDROP 2 +#define TC_RED_ADAPTATIVE 4 +}; + +struct tc_red_xstats { + __u32 early; /* Early drops */ + __u32 pdrop; /* Drops due to queue limits */ + __u32 other; /* Drops due to drop() calls */ + __u32 marked; /* Marked packets */ +}; + +/* GRED section */ + +#define MAX_DPs 16 + +enum { + TCA_GRED_UNSPEC, + TCA_GRED_PARMS, + TCA_GRED_STAB, + TCA_GRED_DPS, + TCA_GRED_MAX_P, + TCA_GRED_LIMIT, + TCA_GRED_VQ_LIST, /* nested TCA_GRED_VQ_ENTRY */ + __TCA_GRED_MAX, +}; + +#define TCA_GRED_MAX (__TCA_GRED_MAX - 1) + +enum { + TCA_GRED_VQ_ENTRY_UNSPEC, + TCA_GRED_VQ_ENTRY, /* nested TCA_GRED_VQ_* */ + __TCA_GRED_VQ_ENTRY_MAX, +}; +#define TCA_GRED_VQ_ENTRY_MAX (__TCA_GRED_VQ_ENTRY_MAX - 1) + +enum { + TCA_GRED_VQ_UNSPEC, + TCA_GRED_VQ_PAD, + TCA_GRED_VQ_DP, /* u32 */ + TCA_GRED_VQ_STAT_BYTES, /* u64 */ + TCA_GRED_VQ_STAT_PACKETS, /* u32 */ + TCA_GRED_VQ_STAT_BACKLOG, /* u32 */ + TCA_GRED_VQ_STAT_PROB_DROP, /* u32 */ + TCA_GRED_VQ_STAT_PROB_MARK, /* u32 */ + TCA_GRED_VQ_STAT_FORCED_DROP, /* u32 */ + TCA_GRED_VQ_STAT_FORCED_MARK, /* u32 */ + TCA_GRED_VQ_STAT_PDROP, /* u32 */ + TCA_GRED_VQ_STAT_OTHER, /* u32 */ + TCA_GRED_VQ_FLAGS, /* u32 */ + __TCA_GRED_VQ_MAX +}; + +#define TCA_GRED_VQ_MAX (__TCA_GRED_VQ_MAX - 1) + +struct tc_gred_qopt { + __u32 limit; /* HARD maximal queue length (bytes) */ + __u32 qth_min; /* Min average length threshold (bytes) */ + __u32 qth_max; /* Max average length threshold (bytes) */ + __u32 DP; /* up to 2^32 DPs */ + __u32 backlog; + __u32 qave; + __u32 forced; + __u32 early; + __u32 other; + __u32 pdrop; + __u8 Wlog; /* log(W) */ + __u8 Plog; /* log(P_max/(qth_max-qth_min)) */ + __u8 Scell_log; /* cell size for idle damping */ + __u8 prio; /* prio of this VQ */ + __u32 packets; + __u32 bytesin; +}; + +/* gred setup */ +struct tc_gred_sopt { + __u32 DPs; + __u32 def_DP; + __u8 grio; + __u8 flags; + __u16 pad1; +}; + +/* CHOKe section */ + +enum { + TCA_CHOKE_UNSPEC, + TCA_CHOKE_PARMS, + TCA_CHOKE_STAB, + TCA_CHOKE_MAX_P, + __TCA_CHOKE_MAX, +}; + +#define TCA_CHOKE_MAX (__TCA_CHOKE_MAX - 1) + +struct tc_choke_qopt { + __u32 limit; /* Hard queue length (packets) */ + __u32 qth_min; /* Min average threshold (packets) */ + __u32 qth_max; /* Max average threshold (packets) */ + unsigned char Wlog; /* log(W) */ + unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ + unsigned char Scell_log; /* cell size for idle damping */ + unsigned char flags; /* see RED flags */ +}; + +struct tc_choke_xstats { + __u32 early; /* Early drops */ + __u32 pdrop; /* Drops due to queue limits */ + __u32 other; /* Drops due to drop() calls */ + __u32 marked; /* Marked packets */ + __u32 matched; /* Drops due to flow match */ +}; + +/* HTB section */ +#define TC_HTB_NUMPRIO 8 +#define TC_HTB_MAXDEPTH 8 +#define TC_HTB_PROTOVER 3 /* the same as HTB and TC's major */ + +struct tc_htb_opt { + struct tc_ratespec rate; + struct tc_ratespec ceil; + __u32 buffer; + __u32 cbuffer; + __u32 quantum; + __u32 level; /* out only */ + __u32 prio; +}; +struct tc_htb_glob { + __u32 version; /* to match HTB/TC */ + __u32 rate2quantum; /* bps->quantum divisor */ + __u32 defcls; /* default class number */ + __u32 debug; /* debug flags */ + + /* stats */ + __u32 direct_pkts; /* count of non shaped packets */ +}; +enum { + TCA_HTB_UNSPEC, + TCA_HTB_PARMS, + TCA_HTB_INIT, + TCA_HTB_CTAB, + TCA_HTB_RTAB, + TCA_HTB_DIRECT_QLEN, + TCA_HTB_RATE64, + TCA_HTB_CEIL64, + TCA_HTB_PAD, + __TCA_HTB_MAX, +}; + +#define TCA_HTB_MAX (__TCA_HTB_MAX - 1) + +struct tc_htb_xstats { + __u32 lends; + __u32 borrows; + __u32 giants; /* unused since 'Make HTB scheduler work with TSO.' */ + __s32 tokens; + __s32 ctokens; +}; + +/* HFSC section */ + +struct tc_hfsc_qopt { + __u16 defcls; /* default class */ +}; + +struct tc_service_curve { + __u32 m1; /* slope of the first segment in bps */ + __u32 d; /* x-projection of the first segment in us */ + __u32 m2; /* slope of the second segment in bps */ +}; + +struct tc_hfsc_stats { + __u64 work; /* total work done */ + __u64 rtwork; /* work done by real-time criteria */ + __u32 period; /* current period */ + __u32 level; /* class level in hierarchy */ +}; + +enum { + TCA_HFSC_UNSPEC, + TCA_HFSC_RSC, + TCA_HFSC_FSC, + TCA_HFSC_USC, + __TCA_HFSC_MAX, +}; + +#define TCA_HFSC_MAX (__TCA_HFSC_MAX - 1) + + +/* CBQ section */ + +#define TC_CBQ_MAXPRIO 8 +#define TC_CBQ_MAXLEVEL 8 +#define TC_CBQ_DEF_EWMA 5 + +struct tc_cbq_lssopt { + unsigned char change; + unsigned char flags; +#define TCF_CBQ_LSS_BOUNDED 1 +#define TCF_CBQ_LSS_ISOLATED 2 + unsigned char ewma_log; + unsigned char level; +#define TCF_CBQ_LSS_FLAGS 1 +#define TCF_CBQ_LSS_EWMA 2 +#define TCF_CBQ_LSS_MAXIDLE 4 +#define TCF_CBQ_LSS_MINIDLE 8 +#define TCF_CBQ_LSS_OFFTIME 0x10 +#define TCF_CBQ_LSS_AVPKT 0x20 + __u32 maxidle; + __u32 minidle; + __u32 offtime; + __u32 avpkt; +}; + +struct tc_cbq_wrropt { + unsigned char flags; + unsigned char priority; + unsigned char cpriority; + unsigned char __reserved; + __u32 allot; + __u32 weight; +}; + +struct tc_cbq_ovl { + unsigned char strategy; +#define TC_CBQ_OVL_CLASSIC 0 +#define TC_CBQ_OVL_DELAY 1 +#define TC_CBQ_OVL_LOWPRIO 2 +#define TC_CBQ_OVL_DROP 3 +#define TC_CBQ_OVL_RCLASSIC 4 + unsigned char priority2; + __u16 pad; + __u32 penalty; +}; + +struct tc_cbq_police { + unsigned char police; + unsigned char __res1; + unsigned short __res2; +}; + +struct tc_cbq_fopt { + __u32 split; + __u32 defmap; + __u32 defchange; +}; + +struct tc_cbq_xstats { + __u32 borrows; + __u32 overactions; + __s32 avgidle; + __s32 undertime; +}; + +enum { + TCA_CBQ_UNSPEC, + TCA_CBQ_LSSOPT, + TCA_CBQ_WRROPT, + TCA_CBQ_FOPT, + TCA_CBQ_OVL_STRATEGY, + TCA_CBQ_RATE, + TCA_CBQ_RTAB, + TCA_CBQ_POLICE, + __TCA_CBQ_MAX, +}; + +#define TCA_CBQ_MAX (__TCA_CBQ_MAX - 1) + +/* dsmark section */ + +enum { + TCA_DSMARK_UNSPEC, + TCA_DSMARK_INDICES, + TCA_DSMARK_DEFAULT_INDEX, + TCA_DSMARK_SET_TC_INDEX, + TCA_DSMARK_MASK, + TCA_DSMARK_VALUE, + __TCA_DSMARK_MAX, +}; + +#define TCA_DSMARK_MAX (__TCA_DSMARK_MAX - 1) + +/* ATM section */ + +enum { + TCA_ATM_UNSPEC, + TCA_ATM_FD, /* file/socket descriptor */ + TCA_ATM_PTR, /* pointer to descriptor - later */ + TCA_ATM_HDR, /* LL header */ + TCA_ATM_EXCESS, /* excess traffic class (0 for CLP) */ + TCA_ATM_ADDR, /* PVC address (for output only) */ + TCA_ATM_STATE, /* VC state (ATM_VS_*; for output only) */ + __TCA_ATM_MAX, +}; + +#define TCA_ATM_MAX (__TCA_ATM_MAX - 1) + +/* Network emulator */ + +enum { + TCA_NETEM_UNSPEC, + TCA_NETEM_CORR, + TCA_NETEM_DELAY_DIST, + TCA_NETEM_REORDER, + TCA_NETEM_CORRUPT, + TCA_NETEM_LOSS, + TCA_NETEM_RATE, + TCA_NETEM_ECN, + TCA_NETEM_RATE64, + TCA_NETEM_PAD, + TCA_NETEM_LATENCY64, + TCA_NETEM_JITTER64, + TCA_NETEM_SLOT, + TCA_NETEM_SLOT_DIST, + __TCA_NETEM_MAX, +}; + +#define TCA_NETEM_MAX (__TCA_NETEM_MAX - 1) + +struct tc_netem_qopt { + __u32 latency; /* added delay (us) */ + __u32 limit; /* fifo limit (packets) */ + __u32 loss; /* random packet loss (0=none ~0=100%) */ + __u32 gap; /* re-ordering gap (0 for none) */ + __u32 duplicate; /* random packet dup (0=none ~0=100%) */ + __u32 jitter; /* random jitter in latency (us) */ +}; + +struct tc_netem_corr { + __u32 delay_corr; /* delay correlation */ + __u32 loss_corr; /* packet loss correlation */ + __u32 dup_corr; /* duplicate correlation */ +}; + +struct tc_netem_reorder { + __u32 probability; + __u32 correlation; +}; + +struct tc_netem_corrupt { + __u32 probability; + __u32 correlation; +}; + +struct tc_netem_rate { + __u32 rate; /* byte/s */ + __s32 packet_overhead; + __u32 cell_size; + __s32 cell_overhead; +}; + +struct tc_netem_slot { + __s64 min_delay; /* nsec */ + __s64 max_delay; + __s32 max_packets; + __s32 max_bytes; + __s64 dist_delay; /* nsec */ + __s64 dist_jitter; /* nsec */ +}; + +enum { + NETEM_LOSS_UNSPEC, + NETEM_LOSS_GI, /* General Intuitive - 4 state model */ + NETEM_LOSS_GE, /* Gilbert Elliot models */ + __NETEM_LOSS_MAX +}; +#define NETEM_LOSS_MAX (__NETEM_LOSS_MAX - 1) + +/* State transition probabilities for 4 state model */ +struct tc_netem_gimodel { + __u32 p13; + __u32 p31; + __u32 p32; + __u32 p14; + __u32 p23; +}; + +/* Gilbert-Elliot models */ +struct tc_netem_gemodel { + __u32 p; + __u32 r; + __u32 h; + __u32 k1; +}; + +#define NETEM_DIST_SCALE 8192 +#define NETEM_DIST_MAX 16384 + +/* DRR */ + +enum { + TCA_DRR_UNSPEC, + TCA_DRR_QUANTUM, + __TCA_DRR_MAX +}; + +#define TCA_DRR_MAX (__TCA_DRR_MAX - 1) + +struct tc_drr_stats { + __u32 deficit; +}; + +/* MQPRIO */ +#define TC_QOPT_BITMASK 15 +#define TC_QOPT_MAX_QUEUE 16 + +enum { + TC_MQPRIO_HW_OFFLOAD_NONE, /* no offload requested */ + TC_MQPRIO_HW_OFFLOAD_TCS, /* offload TCs, no queue counts */ + __TC_MQPRIO_HW_OFFLOAD_MAX +}; + +#define TC_MQPRIO_HW_OFFLOAD_MAX (__TC_MQPRIO_HW_OFFLOAD_MAX - 1) + +enum { + TC_MQPRIO_MODE_DCB, + TC_MQPRIO_MODE_CHANNEL, + __TC_MQPRIO_MODE_MAX +}; + +#define __TC_MQPRIO_MODE_MAX (__TC_MQPRIO_MODE_MAX - 1) + +enum { + TC_MQPRIO_SHAPER_DCB, + TC_MQPRIO_SHAPER_BW_RATE, /* Add new shapers below */ + __TC_MQPRIO_SHAPER_MAX +}; + +#define __TC_MQPRIO_SHAPER_MAX (__TC_MQPRIO_SHAPER_MAX - 1) + +struct tc_mqprio_qopt { + __u8 num_tc; + __u8 prio_tc_map[TC_QOPT_BITMASK + 1]; + __u8 hw; + __u16 count[TC_QOPT_MAX_QUEUE]; + __u16 offset[TC_QOPT_MAX_QUEUE]; +}; + +#define TC_MQPRIO_F_MODE 0x1 +#define TC_MQPRIO_F_SHAPER 0x2 +#define TC_MQPRIO_F_MIN_RATE 0x4 +#define TC_MQPRIO_F_MAX_RATE 0x8 + +enum { + TCA_MQPRIO_UNSPEC, + TCA_MQPRIO_MODE, + TCA_MQPRIO_SHAPER, + TCA_MQPRIO_MIN_RATE64, + TCA_MQPRIO_MAX_RATE64, + __TCA_MQPRIO_MAX, +}; + +#define TCA_MQPRIO_MAX (__TCA_MQPRIO_MAX - 1) + +/* SFB */ + +enum { + TCA_SFB_UNSPEC, + TCA_SFB_PARMS, + __TCA_SFB_MAX, +}; + +#define TCA_SFB_MAX (__TCA_SFB_MAX - 1) + +/* + * Note: increment, decrement are Q0.16 fixed-point values. + */ +struct tc_sfb_qopt { + __u32 rehash_interval; /* delay between hash move, in ms */ + __u32 warmup_time; /* double buffering warmup time in ms (warmup_time < rehash_interval) */ + __u32 max; /* max len of qlen_min */ + __u32 bin_size; /* maximum queue length per bin */ + __u32 increment; /* probability increment, (d1 in Blue) */ + __u32 decrement; /* probability decrement, (d2 in Blue) */ + __u32 limit; /* max SFB queue length */ + __u32 penalty_rate; /* inelastic flows are rate limited to 'rate' pps */ + __u32 penalty_burst; +}; + +struct tc_sfb_xstats { + __u32 earlydrop; + __u32 penaltydrop; + __u32 bucketdrop; + __u32 queuedrop; + __u32 childdrop; /* drops in child qdisc */ + __u32 marked; + __u32 maxqlen; + __u32 maxprob; + __u32 avgprob; +}; + +#define SFB_MAX_PROB 0xFFFF + +/* QFQ */ +enum { + TCA_QFQ_UNSPEC, + TCA_QFQ_WEIGHT, + TCA_QFQ_LMAX, + __TCA_QFQ_MAX +}; + +#define TCA_QFQ_MAX (__TCA_QFQ_MAX - 1) + +struct tc_qfq_stats { + __u32 weight; + __u32 lmax; +}; + +/* CODEL */ + +enum { + TCA_CODEL_UNSPEC, + TCA_CODEL_TARGET, + TCA_CODEL_LIMIT, + TCA_CODEL_INTERVAL, + TCA_CODEL_ECN, + TCA_CODEL_CE_THRESHOLD, + __TCA_CODEL_MAX +}; + +#define TCA_CODEL_MAX (__TCA_CODEL_MAX - 1) + +struct tc_codel_xstats { + __u32 maxpacket; /* largest packet we've seen so far */ + __u32 count; /* how many drops we've done since the last time we + * entered dropping state + */ + __u32 lastcount; /* count at entry to dropping state */ + __u32 ldelay; /* in-queue delay seen by most recently dequeued packet */ + __s32 drop_next; /* time to drop next packet */ + __u32 drop_overlimit; /* number of time max qdisc packet limit was hit */ + __u32 ecn_mark; /* number of packets we ECN marked instead of dropped */ + __u32 dropping; /* are we in dropping state ? */ + __u32 ce_mark; /* number of CE marked packets because of ce_threshold */ +}; + +/* FQ_CODEL */ + +enum { + TCA_FQ_CODEL_UNSPEC, + TCA_FQ_CODEL_TARGET, + TCA_FQ_CODEL_LIMIT, + TCA_FQ_CODEL_INTERVAL, + TCA_FQ_CODEL_ECN, + TCA_FQ_CODEL_FLOWS, + TCA_FQ_CODEL_QUANTUM, + TCA_FQ_CODEL_CE_THRESHOLD, + TCA_FQ_CODEL_DROP_BATCH_SIZE, + TCA_FQ_CODEL_MEMORY_LIMIT, + __TCA_FQ_CODEL_MAX +}; + +#define TCA_FQ_CODEL_MAX (__TCA_FQ_CODEL_MAX - 1) + +enum { + TCA_FQ_CODEL_XSTATS_QDISC, + TCA_FQ_CODEL_XSTATS_CLASS, +}; + +struct tc_fq_codel_qd_stats { + __u32 maxpacket; /* largest packet we've seen so far */ + __u32 drop_overlimit; /* number of time max qdisc + * packet limit was hit + */ + __u32 ecn_mark; /* number of packets we ECN marked + * instead of being dropped + */ + __u32 new_flow_count; /* number of time packets + * created a 'new flow' + */ + __u32 new_flows_len; /* count of flows in new list */ + __u32 old_flows_len; /* count of flows in old list */ + __u32 ce_mark; /* packets above ce_threshold */ + __u32 memory_usage; /* in bytes */ + __u32 drop_overmemory; +}; + +struct tc_fq_codel_cl_stats { + __s32 deficit; + __u32 ldelay; /* in-queue delay seen by most recently + * dequeued packet + */ + __u32 count; + __u32 lastcount; + __u32 dropping; + __s32 drop_next; +}; + +struct tc_fq_codel_xstats { + __u32 type; + union { + struct tc_fq_codel_qd_stats qdisc_stats; + struct tc_fq_codel_cl_stats class_stats; + }; +}; + +/* FQ */ + +enum { + TCA_FQ_UNSPEC, + + TCA_FQ_PLIMIT, /* limit of total number of packets in queue */ + + TCA_FQ_FLOW_PLIMIT, /* limit of packets per flow */ + + TCA_FQ_QUANTUM, /* RR quantum */ + + TCA_FQ_INITIAL_QUANTUM, /* RR quantum for new flow */ + + TCA_FQ_RATE_ENABLE, /* enable/disable rate limiting */ + + TCA_FQ_FLOW_DEFAULT_RATE,/* obsolete, do not use */ + + TCA_FQ_FLOW_MAX_RATE, /* per flow max rate */ + + TCA_FQ_BUCKETS_LOG, /* log2(number of buckets) */ + + TCA_FQ_FLOW_REFILL_DELAY, /* flow credit refill delay in usec */ + + TCA_FQ_ORPHAN_MASK, /* mask applied to orphaned skb hashes */ + + TCA_FQ_LOW_RATE_THRESHOLD, /* per packet delay under this rate */ + + TCA_FQ_CE_THRESHOLD, /* DCTCP-like CE-marking threshold */ + + __TCA_FQ_MAX +}; + +#define TCA_FQ_MAX (__TCA_FQ_MAX - 1) + +struct tc_fq_qd_stats { + __u64 gc_flows; + __u64 highprio_packets; + __u64 tcp_retrans; + __u64 throttled; + __u64 flows_plimit; + __u64 pkts_too_long; + __u64 allocation_errors; + __s64 time_next_delayed_flow; + __u32 flows; + __u32 inactive_flows; + __u32 throttled_flows; + __u32 unthrottle_latency_ns; + __u64 ce_mark; /* packets above ce_threshold */ +}; + +/* Heavy-Hitter Filter */ + +enum { + TCA_HHF_UNSPEC, + TCA_HHF_BACKLOG_LIMIT, + TCA_HHF_QUANTUM, + TCA_HHF_HH_FLOWS_LIMIT, + TCA_HHF_RESET_TIMEOUT, + TCA_HHF_ADMIT_BYTES, + TCA_HHF_EVICT_TIMEOUT, + TCA_HHF_NON_HH_WEIGHT, + __TCA_HHF_MAX +}; + +#define TCA_HHF_MAX (__TCA_HHF_MAX - 1) + +struct tc_hhf_xstats { + __u32 drop_overlimit; /* number of times max qdisc packet limit + * was hit + */ + __u32 hh_overlimit; /* number of times max heavy-hitters was hit */ + __u32 hh_tot_count; /* number of captured heavy-hitters so far */ + __u32 hh_cur_count; /* number of current heavy-hitters */ +}; + +/* PIE */ +enum { + TCA_PIE_UNSPEC, + TCA_PIE_TARGET, + TCA_PIE_LIMIT, + TCA_PIE_TUPDATE, + TCA_PIE_ALPHA, + TCA_PIE_BETA, + TCA_PIE_ECN, + TCA_PIE_BYTEMODE, + __TCA_PIE_MAX +}; +#define TCA_PIE_MAX (__TCA_PIE_MAX - 1) + +struct tc_pie_xstats { + __u32 prob; /* current probability */ + __u32 delay; /* current delay in ms */ + __u32 avg_dq_rate; /* current average dq_rate in bits/pie_time */ + __u32 packets_in; /* total number of packets enqueued */ + __u32 dropped; /* packets dropped due to pie_action */ + __u32 overlimit; /* dropped due to lack of space in queue */ + __u32 maxq; /* maximum queue size */ + __u32 ecn_mark; /* packets marked with ecn*/ +}; + +/* CBS */ +struct tc_cbs_qopt { + __u8 offload; + __u8 _pad[3]; + __s32 hicredit; + __s32 locredit; + __s32 idleslope; + __s32 sendslope; +}; + +enum { + TCA_CBS_UNSPEC, + TCA_CBS_PARMS, + __TCA_CBS_MAX, +}; + +#define TCA_CBS_MAX (__TCA_CBS_MAX - 1) + + +/* ETF */ +struct tc_etf_qopt { + __s32 delta; + __s32 clockid; + __u32 flags; +#define TC_ETF_DEADLINE_MODE_ON BIT(0) +#define TC_ETF_OFFLOAD_ON BIT(1) +}; + +enum { + TCA_ETF_UNSPEC, + TCA_ETF_PARMS, + __TCA_ETF_MAX, +}; + +#define TCA_ETF_MAX (__TCA_ETF_MAX - 1) + + +/* CAKE */ +enum { + TCA_CAKE_UNSPEC, + TCA_CAKE_PAD, + TCA_CAKE_BASE_RATE64, + TCA_CAKE_DIFFSERV_MODE, + TCA_CAKE_ATM, + TCA_CAKE_FLOW_MODE, + TCA_CAKE_OVERHEAD, + TCA_CAKE_RTT, + TCA_CAKE_TARGET, + TCA_CAKE_AUTORATE, + TCA_CAKE_MEMORY, + TCA_CAKE_NAT, + TCA_CAKE_RAW, + TCA_CAKE_WASH, + TCA_CAKE_MPU, + TCA_CAKE_INGRESS, + TCA_CAKE_ACK_FILTER, + TCA_CAKE_SPLIT_GSO, + __TCA_CAKE_MAX +}; +#define TCA_CAKE_MAX (__TCA_CAKE_MAX - 1) + +enum { + __TCA_CAKE_STATS_INVALID, + TCA_CAKE_STATS_PAD, + TCA_CAKE_STATS_CAPACITY_ESTIMATE64, + TCA_CAKE_STATS_MEMORY_LIMIT, + TCA_CAKE_STATS_MEMORY_USED, + TCA_CAKE_STATS_AVG_NETOFF, + TCA_CAKE_STATS_MIN_NETLEN, + TCA_CAKE_STATS_MAX_NETLEN, + TCA_CAKE_STATS_MIN_ADJLEN, + TCA_CAKE_STATS_MAX_ADJLEN, + TCA_CAKE_STATS_TIN_STATS, + TCA_CAKE_STATS_DEFICIT, + TCA_CAKE_STATS_COBALT_COUNT, + TCA_CAKE_STATS_DROPPING, + TCA_CAKE_STATS_DROP_NEXT_US, + TCA_CAKE_STATS_P_DROP, + TCA_CAKE_STATS_BLUE_TIMER_US, + __TCA_CAKE_STATS_MAX +}; +#define TCA_CAKE_STATS_MAX (__TCA_CAKE_STATS_MAX - 1) + +enum { + __TCA_CAKE_TIN_STATS_INVALID, + TCA_CAKE_TIN_STATS_PAD, + TCA_CAKE_TIN_STATS_SENT_PACKETS, + TCA_CAKE_TIN_STATS_SENT_BYTES64, + TCA_CAKE_TIN_STATS_DROPPED_PACKETS, + TCA_CAKE_TIN_STATS_DROPPED_BYTES64, + TCA_CAKE_TIN_STATS_ACKS_DROPPED_PACKETS, + TCA_CAKE_TIN_STATS_ACKS_DROPPED_BYTES64, + TCA_CAKE_TIN_STATS_ECN_MARKED_PACKETS, + TCA_CAKE_TIN_STATS_ECN_MARKED_BYTES64, + TCA_CAKE_TIN_STATS_BACKLOG_PACKETS, + TCA_CAKE_TIN_STATS_BACKLOG_BYTES, + TCA_CAKE_TIN_STATS_THRESHOLD_RATE64, + TCA_CAKE_TIN_STATS_TARGET_US, + TCA_CAKE_TIN_STATS_INTERVAL_US, + TCA_CAKE_TIN_STATS_WAY_INDIRECT_HITS, + TCA_CAKE_TIN_STATS_WAY_MISSES, + TCA_CAKE_TIN_STATS_WAY_COLLISIONS, + TCA_CAKE_TIN_STATS_PEAK_DELAY_US, + TCA_CAKE_TIN_STATS_AVG_DELAY_US, + TCA_CAKE_TIN_STATS_BASE_DELAY_US, + TCA_CAKE_TIN_STATS_SPARSE_FLOWS, + TCA_CAKE_TIN_STATS_BULK_FLOWS, + TCA_CAKE_TIN_STATS_UNRESPONSIVE_FLOWS, + TCA_CAKE_TIN_STATS_MAX_SKBLEN, + TCA_CAKE_TIN_STATS_FLOW_QUANTUM, + __TCA_CAKE_TIN_STATS_MAX +}; +#define TCA_CAKE_TIN_STATS_MAX (__TCA_CAKE_TIN_STATS_MAX - 1) +#define TC_CAKE_MAX_TINS (8) + +enum { + CAKE_FLOW_NONE = 0, + CAKE_FLOW_SRC_IP, + CAKE_FLOW_DST_IP, + CAKE_FLOW_HOSTS, /* = CAKE_FLOW_SRC_IP | CAKE_FLOW_DST_IP */ + CAKE_FLOW_FLOWS, + CAKE_FLOW_DUAL_SRC, /* = CAKE_FLOW_SRC_IP | CAKE_FLOW_FLOWS */ + CAKE_FLOW_DUAL_DST, /* = CAKE_FLOW_DST_IP | CAKE_FLOW_FLOWS */ + CAKE_FLOW_TRIPLE, /* = CAKE_FLOW_HOSTS | CAKE_FLOW_FLOWS */ + CAKE_FLOW_MAX, +}; + +enum { + CAKE_DIFFSERV_DIFFSERV3 = 0, + CAKE_DIFFSERV_DIFFSERV4, + CAKE_DIFFSERV_DIFFSERV8, + CAKE_DIFFSERV_BESTEFFORT, + CAKE_DIFFSERV_PRECEDENCE, + CAKE_DIFFSERV_MAX +}; + +enum { + CAKE_ACK_NONE = 0, + CAKE_ACK_FILTER, + CAKE_ACK_AGGRESSIVE, + CAKE_ACK_MAX +}; + +enum { + CAKE_ATM_NONE = 0, + CAKE_ATM_ATM, + CAKE_ATM_PTM, + CAKE_ATM_MAX +}; + + +/* TAPRIO */ +enum { + TC_TAPRIO_CMD_SET_GATES = 0x00, + TC_TAPRIO_CMD_SET_AND_HOLD = 0x01, + TC_TAPRIO_CMD_SET_AND_RELEASE = 0x02, +}; + +enum { + TCA_TAPRIO_SCHED_ENTRY_UNSPEC, + TCA_TAPRIO_SCHED_ENTRY_INDEX, /* u32 */ + TCA_TAPRIO_SCHED_ENTRY_CMD, /* u8 */ + TCA_TAPRIO_SCHED_ENTRY_GATE_MASK, /* u32 */ + TCA_TAPRIO_SCHED_ENTRY_INTERVAL, /* u32 */ + __TCA_TAPRIO_SCHED_ENTRY_MAX, +}; +#define TCA_TAPRIO_SCHED_ENTRY_MAX (__TCA_TAPRIO_SCHED_ENTRY_MAX - 1) + +/* The format for schedule entry list is: + * [TCA_TAPRIO_SCHED_ENTRY_LIST] + * [TCA_TAPRIO_SCHED_ENTRY] + * [TCA_TAPRIO_SCHED_ENTRY_CMD] + * [TCA_TAPRIO_SCHED_ENTRY_GATES] + * [TCA_TAPRIO_SCHED_ENTRY_INTERVAL] + */ +enum { + TCA_TAPRIO_SCHED_UNSPEC, + TCA_TAPRIO_SCHED_ENTRY, + __TCA_TAPRIO_SCHED_MAX, +}; + +#define TCA_TAPRIO_SCHED_MAX (__TCA_TAPRIO_SCHED_MAX - 1) + +enum { + TCA_TAPRIO_ATTR_UNSPEC, + TCA_TAPRIO_ATTR_PRIOMAP, /* struct tc_mqprio_qopt */ + TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST, /* nested of entry */ + TCA_TAPRIO_ATTR_SCHED_BASE_TIME, /* s64 */ + TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY, /* single entry */ + TCA_TAPRIO_ATTR_SCHED_CLOCKID, /* s32 */ + TCA_TAPRIO_PAD, + __TCA_TAPRIO_ATTR_MAX, +}; + +#define TCA_TAPRIO_ATTR_MAX (__TCA_TAPRIO_ATTR_MAX - 1) + +#endif diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c @@ -65,6 +65,17 @@ static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, return syscall(__NR_bpf, cmd, attr, size); } +static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size) +{ + int fd; + + do { + fd = sys_bpf(BPF_PROG_LOAD, attr, size); + } while (fd < 0 && errno == EAGAIN); + + return fd; +} + int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) { __u32 name_len = create_attr->name ? strlen(create_attr->name) : 0; @@ -232,7 +243,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, memcpy(attr.prog_name, load_attr->name, min(name_len, BPF_OBJ_NAME_LEN - 1)); - fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); + fd = sys_bpf_prog_load(&attr, sizeof(attr)); if (fd >= 0) return fd; @@ -269,7 +280,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, break; } - fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); + fd = sys_bpf_prog_load(&attr, sizeof(attr)); if (fd >= 0) goto done; @@ -283,7 +294,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, attr.log_size = log_buf_sz; attr.log_level = 1; log_buf[0] = 0; - fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); + fd = sys_bpf_prog_load(&attr, sizeof(attr)); done: free(finfo); free(linfo); @@ -328,7 +339,7 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, attr.kern_version = kern_version; attr.prog_flags = prog_flags; - return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); + return sys_bpf_prog_load(&attr, sizeof(attr)); } int bpf_map_update_elem(int fd, const void *key, const void *value, diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile @@ -56,6 +56,7 @@ TEST_PROGS := test_kmod.sh \ test_xdp_vlan.sh TEST_PROGS_EXTENDED := with_addr.sh \ + with_tunnels.sh \ tcp_client.py \ tcp_server.py diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c @@ -1188,7 +1188,9 @@ static void test_stacktrace_build_id(void) int i, j; struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH]; int build_id_matches = 0; + int retry = 1; +retry: err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) goto out; @@ -1301,6 +1303,19 @@ static void test_stacktrace_build_id(void) previous_key = key; } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0); + /* stack_map_get_build_id_offset() is racy and sometimes can return + * BPF_STACK_BUILD_ID_IP instead of BPF_STACK_BUILD_ID_VALID; + * try it one more time. + */ + if (build_id_matches < 1 && retry--) { + ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE); + close(pmu_fd); + bpf_object__close(obj); + printf("%s:WARN:Didn't find expected build ID from the map, retrying\n", + __func__); + goto retry; + } + if (CHECK(build_id_matches < 1, "build id match", "Didn't find expected build ID from the map\n")) goto disable_pmu; @@ -1341,7 +1356,9 @@ static void test_stacktrace_build_id_nmi(void) int i, j; struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH]; int build_id_matches = 0; + int retry = 1; +retry: err = bpf_prog_load(file, BPF_PROG_TYPE_PERF_EVENT, &obj, &prog_fd); if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) return; @@ -1436,6 +1453,19 @@ static void test_stacktrace_build_id_nmi(void) previous_key = key; } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0); + /* stack_map_get_build_id_offset() is racy and sometimes can return + * BPF_STACK_BUILD_ID_IP instead of BPF_STACK_BUILD_ID_VALID; + * try it one more time. + */ + if (build_id_matches < 1 && retry--) { + ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE); + close(pmu_fd); + bpf_object__close(obj); + printf("%s:WARN:Didn't find expected build ID from the map, retrying\n", + __func__); + goto retry; + } + if (CHECK(build_id_matches < 1, "build id match", "Didn't find expected build ID from the map\n")) goto disable_pmu; diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh @@ -25,6 +25,7 @@ ALL_TESTS=" lag_unlink_slaves_test lag_dev_deletion_test vlan_interface_uppers_test + bridge_extern_learn_test devlink_reload_test " NUM_NETIFS=2 @@ -541,6 +542,25 @@ vlan_interface_uppers_test() ip link del dev br0 } +bridge_extern_learn_test() +{ + # Test that externally learned entries added from user space are + # marked as offloaded + RET=0 + + ip link add name br0 type bridge + ip link set dev $swp1 master br0 + + bridge fdb add de:ad:be:ef:13:37 dev $swp1 master extern_learn + + bridge fdb show brport $swp1 | grep de:ad:be:ef:13:37 | grep -q offload + check_err $? "fdb entry not marked as offloaded when should" + + log_test "externally learned fdb entry" + + ip link del dev br0 +} + devlink_reload_test() { # Test that after executing all the above configuration tests, a diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh @@ -1,7 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion" +ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn" NUM_NETIFS=4 CHECK_TC="yes" source lib.sh @@ -109,6 +109,38 @@ vlan_deletion() ping_ipv6 } +extern_learn() +{ + local mac=de:ad:be:ef:13:37 + local ageing_time + + # Test that externally learned FDB entries can roam, but not age out + RET=0 + + bridge fdb add de:ad:be:ef:13:37 dev $swp1 master extern_learn vlan 1 + + bridge fdb show brport $swp1 | grep -q de:ad:be:ef:13:37 + check_err $? "Did not find FDB entry when should" + + # Wait for 10 seconds after the ageing time to make sure the FDB entry + # was not aged out + ageing_time=$(bridge_ageing_time_get br0) + sleep $((ageing_time + 10)) + + bridge fdb show brport $swp1 | grep -q de:ad:be:ef:13:37 + check_err $? "FDB entry was aged out when should not" + + $MZ $h2 -c 1 -p 64 -a $mac -t ip -q + + bridge fdb show brport $swp2 | grep -q de:ad:be:ef:13:37 + check_err $? "FDB entry did not roam when should" + + log_test "Externally learned FDB entry - ageing & roaming" + + bridge fdb del de:ad:be:ef:13:37 dev $swp2 master vlan 1 &> /dev/null + bridge fdb del de:ad:be:ef:13:37 dev $swp1 master vlan 1 &> /dev/null +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/networking/timestamping/txtimestamp.c b/tools/testing/selftests/networking/timestamping/txtimestamp.c @@ -240,7 +240,7 @@ static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len) cm->cmsg_type == IP_RECVERR) || (cm->cmsg_level == SOL_IPV6 && cm->cmsg_type == IPV6_RECVERR) || - (cm->cmsg_level = SOL_PACKET && + (cm->cmsg_level == SOL_PACKET && cm->cmsg_type == PACKET_TX_TIMESTAMP)) { serr = (void *) CMSG_DATA(cm); if (serr->ee_errno != ENOMSG || diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json @@ -17,7 +17,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow mark pass index 2", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 2", - "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow mark.*index 2", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*allow mark.*index 2", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -41,7 +41,7 @@ "cmdUnderTest": "$TC actions add action ife encode use mark 10 pipe index 2", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 2", - "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use mark.*index 2", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*use mark.*index 2", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -65,7 +65,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow mark continue index 2", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 2", - "matchPattern": "action order [0-9]*: ife encode action continue.*type 0xED3E.*allow mark.*index 2", + "matchPattern": "action order [0-9]*: ife encode action continue.*type 0[xX]ED3E.*allow mark.*index 2", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -89,7 +89,7 @@ "cmdUnderTest": "$TC actions add action ife encode use mark 789 drop index 2", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 2", - "matchPattern": "action order [0-9]*: ife encode action drop.*type 0xED3E.*use mark 789.*index 2", + "matchPattern": "action order [0-9]*: ife encode action drop.*type 0[xX]ED3E.*use mark 789.*index 2", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -113,7 +113,7 @@ "cmdUnderTest": "$TC actions add action ife encode use mark 656768 reclassify index 2", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 2", - "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use mark 656768.*index 2", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ED3E.*use mark 656768.*index 2", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -137,7 +137,7 @@ "cmdUnderTest": "$TC actions add action ife encode use mark 65 jump 1 index 2", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 2", - "matchPattern": "action order [0-9]*: ife encode action jump 1.*type 0xED3E.*use mark 65.*index 2", + "matchPattern": "action order [0-9]*: ife encode action jump 1.*type 0[xX]ED3E.*use mark 65.*index 2", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -161,7 +161,7 @@ "cmdUnderTest": "$TC actions add action ife encode use mark 4294967295 reclassify index 90", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 90", - "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use mark 4294967295.*index 90", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ED3E.*use mark 4294967295.*index 90", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -185,7 +185,7 @@ "cmdUnderTest": "$TC actions add action ife encode use mark 4294967295999 pipe index 90", "expExitCode": "255", "verifyCmd": "$TC actions get action ife index 90", - "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use mark 4294967295999.*index 90", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*use mark 4294967295999.*index 90", "matchCount": "0", "teardown": [] }, @@ -207,7 +207,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow prio pass index 9", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 9", - "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow prio.*index 9", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*allow prio.*index 9", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -231,7 +231,7 @@ "cmdUnderTest": "$TC actions add action ife encode use prio 7 pipe index 9", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 9", - "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use prio 7.*index 9", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*use prio 7.*index 9", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -255,7 +255,7 @@ "cmdUnderTest": "$TC actions add action ife encode use prio 3 continue index 9", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 9", - "matchPattern": "action order [0-9]*: ife encode action continue.*type 0xED3E.*use prio 3.*index 9", + "matchPattern": "action order [0-9]*: ife encode action continue.*type 0[xX]ED3E.*use prio 3.*index 9", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -279,7 +279,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow prio drop index 9", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 9", - "matchPattern": "action order [0-9]*: ife encode action drop.*type 0xED3E.*allow prio.*index 9", + "matchPattern": "action order [0-9]*: ife encode action drop.*type 0[xX]ED3E.*allow prio.*index 9", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -303,7 +303,7 @@ "cmdUnderTest": "$TC actions add action ife encode use prio 998877 reclassify index 9", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 9", - "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use prio 998877.*index 9", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ED3E.*use prio 998877.*index 9", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -327,7 +327,7 @@ "cmdUnderTest": "$TC actions add action ife encode use prio 998877 jump 10 index 9", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 9", - "matchPattern": "action order [0-9]*: ife encode action jump 10.*type 0xED3E.*use prio 998877.*index 9", + "matchPattern": "action order [0-9]*: ife encode action jump 10.*type 0[xX]ED3E.*use prio 998877.*index 9", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -351,7 +351,7 @@ "cmdUnderTest": "$TC actions add action ife encode use prio 4294967295 reclassify index 99", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 99", - "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use prio 4294967295.*index 99", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ED3E.*use prio 4294967295.*index 99", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -375,7 +375,7 @@ "cmdUnderTest": "$TC actions add action ife encode use prio 4294967298 pipe index 99", "expExitCode": "255", "verifyCmd": "$TC actions get action ife index 99", - "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use prio 4294967298.*index 99", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*use prio 4294967298.*index 99", "matchCount": "0", "teardown": [] }, @@ -397,7 +397,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow tcindex pass index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow tcindex.*index 1", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*allow tcindex.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -421,7 +421,7 @@ "cmdUnderTest": "$TC actions add action ife encode use tcindex 111 pipe index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use tcindex 111.*index 1", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*use tcindex 111.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -445,7 +445,7 @@ "cmdUnderTest": "$TC actions add action ife encode use tcindex 1 continue index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife encode action continue.*type 0xED3E.*use tcindex 1.*index 1", + "matchPattern": "action order [0-9]*: ife encode action continue.*type 0[xX]ED3E.*use tcindex 1.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -469,7 +469,7 @@ "cmdUnderTest": "$TC actions add action ife encode use tcindex 1 continue index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife encode action continue.*type 0xED3E.*use tcindex 1.*index 1", + "matchPattern": "action order [0-9]*: ife encode action continue.*type 0[xX]ED3E.*use tcindex 1.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -493,7 +493,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow tcindex drop index 77", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 77", - "matchPattern": "action order [0-9]*: ife encode action drop.*type 0xED3E.*allow tcindex.*index 77", + "matchPattern": "action order [0-9]*: ife encode action drop.*type 0[xX]ED3E.*allow tcindex.*index 77", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -517,7 +517,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow tcindex reclassify index 77", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 77", - "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*allow tcindex.*index 77", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ED3E.*allow tcindex.*index 77", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -541,7 +541,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow tcindex jump 999 index 77", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 77", - "matchPattern": "action order [0-9]*: ife encode action jump 999.*type 0xED3E.*allow tcindex.*index 77", + "matchPattern": "action order [0-9]*: ife encode action jump 999.*type 0[xX]ED3E.*allow tcindex.*index 77", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -565,7 +565,7 @@ "cmdUnderTest": "$TC actions add action ife encode use tcindex 65535 pass index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*use tcindex 65535.*index 1", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*use tcindex 65535.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -589,7 +589,7 @@ "cmdUnderTest": "$TC actions add action ife encode use tcindex 65539 pipe index 1", "expExitCode": "255", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use tcindex 65539.*index 1", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*use tcindex 65539.*index 1", "matchCount": "0", "teardown": [] }, @@ -611,7 +611,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow mark src 00:11:22:33:44:55 pipe index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*allow mark src 00:11:22:33:44:55.*index 1", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*allow mark src 00:11:22:33:44:55.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -635,7 +635,7 @@ "cmdUnderTest": "$TC actions add action ife encode use prio 9876 dst 00:11:22:33:44:55 reclassify index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use prio 9876 dst 00:11:22:33:44:55.*index 1", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ED3E.*use prio 9876 dst 00:11:22:33:44:55.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -659,7 +659,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow tcindex src 00:aa:bb:cc:dd:ee dst 00:11:22:33:44:55 pass index 11", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 11", - "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow tcindex dst 00:11:22:33:44:55 src 00:aa:bb:cc:dd:ee .*index 11", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*allow tcindex dst 00:11:22:33:44:55 src 00:aa:bb:cc:dd:ee .*index 11", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -683,7 +683,7 @@ "cmdUnderTest": "$TC actions add action ife encode use mark 7 type 0xfefe pass index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xFEFE.*use mark 7.*index 1", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]FEFE.*use mark 7.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -707,7 +707,7 @@ "cmdUnderTest": "$TC actions add action ife encode use prio 444 type 0xabba pipe index 21", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 21", - "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xABBA.*use prio 444.*index 21", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ABBA.*use prio 444.*index 21", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -731,7 +731,7 @@ "cmdUnderTest": "$TC actions add action ife encode use tcindex 5000 type 0xabcd reclassify index 21", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 21", - "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xABCD.*use tcindex 5000.*index 21", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ABCD.*use tcindex 5000.*index 21", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -739,7 +739,7 @@ }, { "id": "fac3", - "name": "Create valid ife encode action with index at 32-bit maximnum", + "name": "Create valid ife encode action with index at 32-bit maximum", "category": [ "actions", "ife" @@ -755,7 +755,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow mark pass index 4294967295", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 4294967295", - "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow mark.*index 4294967295", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*allow mark.*index 4294967295", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -779,7 +779,7 @@ "cmdUnderTest": "$TC actions add action ife decode pass index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife decode action pass.*type 0x0.*allow mark allow tcindex allow prio.*index 1", + "matchPattern": "action order [0-9]*: ife decode action pass.*type 0(x0)?.*allow mark allow tcindex allow prio.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -803,7 +803,7 @@ "cmdUnderTest": "$TC actions add action ife decode pipe index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife decode action pipe.*type 0x0.*allow mark allow tcindex allow prio.*index 1", + "matchPattern": "action order [0-9]*: ife decode action pipe.*type 0(x0)?.*allow mark allow tcindex allow prio.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -827,7 +827,7 @@ "cmdUnderTest": "$TC actions add action ife decode continue index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife decode action continue.*type 0x0.*allow mark allow tcindex allow prio.*index 1", + "matchPattern": "action order [0-9]*: ife decode action continue.*type 0(x0)?.*allow mark allow tcindex allow prio.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -851,7 +851,7 @@ "cmdUnderTest": "$TC actions add action ife decode drop index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife decode action drop.*type 0x0.*allow mark allow tcindex allow prio.*index 1", + "matchPattern": "action order [0-9]*: ife decode action drop.*type 0(x0)?.*allow mark allow tcindex allow prio.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -875,7 +875,7 @@ "cmdUnderTest": "$TC actions add action ife decode reclassify index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife decode action reclassify.*type 0x0.*allow mark allow tcindex allow prio.*index 1", + "matchPattern": "action order [0-9]*: ife decode action reclassify.*type 0(x0)?.*allow mark allow tcindex allow prio.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -899,7 +899,7 @@ "cmdUnderTest": "$TC actions add action ife decode jump 10 index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife decode action jump 10.*type 0x0.*allow mark allow tcindex allow prio.*index 1", + "matchPattern": "action order [0-9]*: ife decode action jump 10.*type 0(x0)?.*allow mark allow tcindex allow prio.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -923,7 +923,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow mark pass index 4294967295999", "expExitCode": "255", "verifyCmd": "$TC actions get action ife index 4294967295999", - "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow mark.*index 4294967295999", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*allow mark.*index 4294967295999", "matchCount": "0", "teardown": [] }, @@ -945,7 +945,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow mark kuka index 4", "expExitCode": "255", "verifyCmd": "$TC actions get action ife index 4", - "matchPattern": "action order [0-9]*: ife encode action kuka.*type 0xED3E.*allow mark.*index 4", + "matchPattern": "action order [0-9]*: ife encode action kuka.*type 0[xX]ED3E.*allow mark.*index 4", "matchCount": "0", "teardown": [] }, @@ -967,7 +967,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow prio pipe index 4 cookie aabbccddeeff112233445566778800a1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 4", - "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*allow prio.*index 4.*cookie aabbccddeeff112233445566778800a1", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*allow prio.*index 4.*cookie aabbccddeeff112233445566778800a1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -991,7 +991,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow foo pipe index 4", "expExitCode": "255", "verifyCmd": "$TC actions get action ife index 4", - "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*allow foo.*index 4", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*allow foo.*index 4", "matchCount": "0", "teardown": [] }, @@ -1013,7 +1013,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow prio type 70000 pipe index 4", "expExitCode": "255", "verifyCmd": "$TC actions get action ife index 4", - "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0x11170.*allow prio.*index 4", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]11170.*allow prio.*index 4", "matchCount": "0", "teardown": [] }, diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json @@ -82,35 +82,6 @@ ] }, { - "id": "ba4e", - "name": "Add tunnel_key set action with missing mandatory id parameter", - "category": [ - "actions", - "tunnel_key" - ], - "setup": [ - [ - "$TC actions flush action tunnel_key", - 0, - 1, - 255 - ] - ], - "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2", - "expExitCode": "255", - "verifyCmd": "$TC actions list action tunnel_key", - "matchPattern": "action order [0-9]+: tunnel_key set.*src_ip 10.10.10.1.*dst_ip 20.20.20.2", - "matchCount": "0", - "teardown": [ - [ - "$TC actions flush action tunnel_key", - 0, - 1, - 255 - ] - ] - }, - { "id": "a5e0", "name": "Add tunnel_key set action with invalid src_ip parameter", "category": [ @@ -634,7 +605,7 @@ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 10.10.10.2 id 7 index 4 cookie aa11bb22cc33dd44ee55ff66aa11b1b2", "expExitCode": "0", "verifyCmd": "$TC actions get action tunnel_key index 4", - "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 10.10.10.1.*dst_ip 10.10.10.2.*key_id 7.*dst_port 0.*csum pipe.*index 4 ref.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2", + "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 10.10.10.1.*dst_ip 10.10.10.2.*key_id 7.*csum pipe.*index 4 ref.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2", "matchCount": "1", "teardown": [ "$TC actions flush action tunnel_key"