From e8f69b16ee776da88589b5271e3f46020efc8f6c Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 17 Sep 2021 12:12:04 +0200
Subject: net: hso: fix muxed tty registration

If resource allocation and registration fail for a muxed tty device
(e.g. if there are no more minor numbers) the driver should not try to
deregister the never-registered (or already-deregistered) tty.

Fix up the error handling to avoid dereferencing a NULL pointer when
attempting to remove the character device.

Fixes: 72dc1c096c70 ("HSO: add option hso driver")
Cc: stable@vger.kernel.org	# 2.6.27
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/hso.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)
diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index a57251ba5991..f97813a4e8d1 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -2719,14 +2719,14 @@ struct hso_device *hso_create_mux_serial_device(struct usb_interface *interface,
 
 	serial = kzalloc(sizeof(*serial), GFP_KERNEL);
 	if (!serial)
-		goto exit;
+		goto err_free_dev;
 
 	hso_dev->port_data.dev_serial = serial;
 	serial->parent = hso_dev;
 
 	if (hso_serial_common_create
 	    (serial, 1, CTRL_URB_RX_SIZE, CTRL_URB_TX_SIZE))
-		goto exit;
+		goto err_free_serial;
 
 	serial->tx_data_length--;
 	serial->write_data = hso_mux_serial_write_data;
@@ -2742,11 +2742,9 @@ struct hso_device *hso_create_mux_serial_device(struct usb_interface *interface,
 	/* done, return it */
 	return hso_dev;
 
-exit:
-	if (serial) {
-		tty_unregister_device(tty_drv, serial->minor);
-		kfree(serial);
-	}
+err_free_serial:
+	kfree(serial);
+err_free_dev:
 	kfree(hso_dev);
 	return NULL;
 
-- 
cgit v1.2.3


From 3c9cfb5269f76d447dbadb67835368f3111a91d7 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 17 Sep 2021 14:17:35 +0300
Subject: net: update NXP copyright text

NXP Legal insists that the following are not fine:

- Saying "NXP Semiconductors" instead of "NXP", since the company's
  registered name is "NXP"

- Putting a "(c)" sign in the copyright string

- Putting a comma in the copyright string

The only accepted copyright string format is "Copyright <year-range> NXP".

This patch changes the copyright headers in the networking files that
were sent by me, or derived from code sent by me.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/ocelot/felix.c                                 | 2 +-
 drivers/net/dsa/ocelot/felix.h                                 | 2 +-
 drivers/net/dsa/ocelot/felix_vsc9959.c                         | 2 +-
 drivers/net/dsa/sja1105/sja1105_clocking.c                     | 2 +-
 drivers/net/dsa/sja1105/sja1105_devlink.c                      | 2 +-
 drivers/net/dsa/sja1105/sja1105_flower.c                       | 2 +-
 drivers/net/dsa/sja1105/sja1105_mdio.c                         | 2 +-
 drivers/net/dsa/sja1105/sja1105_spi.c                          | 2 +-
 drivers/net/dsa/sja1105/sja1105_static_config.c                | 2 +-
 drivers/net/dsa/sja1105/sja1105_static_config.h                | 2 +-
 drivers/net/dsa/sja1105/sja1105_vl.c                           | 2 +-
 drivers/net/dsa/sja1105/sja1105_vl.h                           | 2 +-
 drivers/net/ethernet/freescale/enetc/enetc_ierb.c              | 2 +-
 drivers/net/ethernet/freescale/enetc/enetc_ierb.h              | 2 +-
 drivers/net/ethernet/mscc/ocelot_devlink.c                     | 2 +-
 drivers/net/ethernet/mscc/ocelot_mrp.c                         | 2 +-
 drivers/net/ethernet/mscc/ocelot_net.c                         | 2 +-
 drivers/net/pcs/pcs-xpcs-nxp.c                                 | 2 +-
 include/linux/dsa/ocelot.h                                     | 2 +-
 include/linux/packing.h                                        | 2 +-
 lib/packing.c                                                  | 2 +-
 net/dsa/tag_ocelot.c                                           | 2 +-
 net/dsa/tag_ocelot_8021q.c                                     | 2 +-
 tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh | 2 +-
 24 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
index 3656e67af789..a3a9636430d6 100644
--- a/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright 2019-2021 NXP Semiconductors
+/* Copyright 2019-2021 NXP
  *
  * This is an umbrella module for all network switches that are
  * register-compatible with Ocelot and that perform I/O to their host CPU
diff --git a/drivers/net/dsa/ocelot/felix.h b/drivers/net/dsa/ocelot/felix.h
index 5854bab43327..54024b6f9498 100644
--- a/drivers/net/dsa/ocelot/felix.h
+++ b/drivers/net/dsa/ocelot/felix.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright 2019 NXP Semiconductors
+/* Copyright 2019 NXP
  */
 #ifndef _MSCC_FELIX_H
 #define _MSCC_FELIX_H
diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c
index f966a253d1c7..9e2ac8e46619 100644
--- a/drivers/net/dsa/ocelot/felix_vsc9959.c
+++ b/drivers/net/dsa/ocelot/felix_vsc9959.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
 /* Copyright 2017 Microsemi Corporation
- * Copyright 2018-2019 NXP Semiconductors
+ * Copyright 2018-2019 NXP
  */
 #include <linux/fsl/enetc_mdio.h>
 #include <soc/mscc/ocelot_qsys.h>
diff --git a/drivers/net/dsa/sja1105/sja1105_clocking.c b/drivers/net/dsa/sja1105/sja1105_clocking.c
index 387a1f2f161c..5bbf1707f2af 100644
--- a/drivers/net/dsa/sja1105/sja1105_clocking.c
+++ b/drivers/net/dsa/sja1105/sja1105_clocking.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: BSD-3-Clause
-/* Copyright (c) 2016-2018, NXP Semiconductors
+/* Copyright 2016-2018 NXP
  * Copyright (c) 2018-2019, Vladimir Oltean <olteanv@gmail.com>
  */
 #include <linux/packing.h>
diff --git a/drivers/net/dsa/sja1105/sja1105_devlink.c b/drivers/net/dsa/sja1105/sja1105_devlink.c
index 05c7f4ca3b1a..0569ff066634 100644
--- a/drivers/net/dsa/sja1105/sja1105_devlink.c
+++ b/drivers/net/dsa/sja1105/sja1105_devlink.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2018-2019, Vladimir Oltean <olteanv@gmail.com>
- * Copyright 2020 NXP Semiconductors
+ * Copyright 2020 NXP
  */
 #include "sja1105.h"
 
diff --git a/drivers/net/dsa/sja1105/sja1105_flower.c b/drivers/net/dsa/sja1105/sja1105_flower.c
index 6c10ffa968ce..72b9b39b0989 100644
--- a/drivers/net/dsa/sja1105/sja1105_flower.c
+++ b/drivers/net/dsa/sja1105/sja1105_flower.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright 2020, NXP Semiconductors
+/* Copyright 2020 NXP
  */
 #include "sja1105.h"
 #include "sja1105_vl.h"
diff --git a/drivers/net/dsa/sja1105/sja1105_mdio.c b/drivers/net/dsa/sja1105/sja1105_mdio.c
index 705d3900e43a..215dd17ca790 100644
--- a/drivers/net/dsa/sja1105/sja1105_mdio.c
+++ b/drivers/net/dsa/sja1105/sja1105_mdio.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright 2021, NXP Semiconductors
+/* Copyright 2021 NXP
  */
 #include <linux/pcs/pcs-xpcs.h>
 #include <linux/of_mdio.h>
diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c
index d60a530d0272..d3c9ad6d39d4 100644
--- a/drivers/net/dsa/sja1105/sja1105_spi.c
+++ b/drivers/net/dsa/sja1105/sja1105_spi.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: BSD-3-Clause
-/* Copyright (c) 2016-2018, NXP Semiconductors
+/* Copyright 2016-2018 NXP
  * Copyright (c) 2018, Sensor-Technik Wiedemann GmbH
  * Copyright (c) 2018-2019, Vladimir Oltean <olteanv@gmail.com>
  */
diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.c b/drivers/net/dsa/sja1105/sja1105_static_config.c
index 7a422ef4deb6..baba204ad62f 100644
--- a/drivers/net/dsa/sja1105/sja1105_static_config.c
+++ b/drivers/net/dsa/sja1105/sja1105_static_config.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: BSD-3-Clause
-/* Copyright (c) 2016-2018, NXP Semiconductors
+/* Copyright 2016-2018 NXP
  * Copyright (c) 2018-2019, Vladimir Oltean <olteanv@gmail.com>
  */
 #include "sja1105_static_config.h"
diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.h b/drivers/net/dsa/sja1105/sja1105_static_config.h
index bce0f5c03d0b..6a372d5f22ae 100644
--- a/drivers/net/dsa/sja1105/sja1105_static_config.h
+++ b/drivers/net/dsa/sja1105/sja1105_static_config.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause */
-/* Copyright (c) 2016-2018, NXP Semiconductors
+/* Copyright 2016-2018 NXP
  * Copyright (c) 2018-2019, Vladimir Oltean <olteanv@gmail.com>
  */
 #ifndef _SJA1105_STATIC_CONFIG_H
diff --git a/drivers/net/dsa/sja1105/sja1105_vl.c b/drivers/net/dsa/sja1105/sja1105_vl.c
index ec7b65daec20..6802f4057cc0 100644
--- a/drivers/net/dsa/sja1105/sja1105_vl.c
+++ b/drivers/net/dsa/sja1105/sja1105_vl.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright 2020, NXP Semiconductors
+/* Copyright 2020 NXP
  */
 #include <net/tc_act/tc_gate.h>
 #include <linux/dsa/8021q.h>
diff --git a/drivers/net/dsa/sja1105/sja1105_vl.h b/drivers/net/dsa/sja1105/sja1105_vl.h
index 173d78963fed..51fba0dce91a 100644
--- a/drivers/net/dsa/sja1105/sja1105_vl.h
+++ b/drivers/net/dsa/sja1105/sja1105_vl.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright 2020, NXP Semiconductors
+/* Copyright 2020 NXP
  */
 #ifndef _SJA1105_VL_H
 #define _SJA1105_VL_H
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ierb.c b/drivers/net/ethernet/freescale/enetc/enetc_ierb.c
index ee1468e3eaa3..91f02c505028 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_ierb.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_ierb.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
-/* Copyright 2021 NXP Semiconductors
+/* Copyright 2021 NXP
  *
  * The Integrated Endpoint Register Block (IERB) is configured by pre-boot
  * software and is supposed to be to ENETC what a NVRAM is to a 'real' PCIe
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ierb.h b/drivers/net/ethernet/freescale/enetc/enetc_ierb.h
index b3b774e0998a..c2ce47c4be9f 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_ierb.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc_ierb.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
-/* Copyright 2021 NXP Semiconductors */
+/* Copyright 2021 NXP */
 
 #include <linux/pci.h>
 #include <linux/platform_device.h>
diff --git a/drivers/net/ethernet/mscc/ocelot_devlink.c b/drivers/net/ethernet/mscc/ocelot_devlink.c
index edafbd37d12c..b8737efd2a85 100644
--- a/drivers/net/ethernet/mscc/ocelot_devlink.c
+++ b/drivers/net/ethernet/mscc/ocelot_devlink.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
-/* Copyright 2020-2021 NXP Semiconductors
+/* Copyright 2020-2021 NXP
  */
 #include <net/devlink.h>
 #include "ocelot.h"
diff --git a/drivers/net/ethernet/mscc/ocelot_mrp.c b/drivers/net/ethernet/mscc/ocelot_mrp.c
index 08b481a93460..4b0941f09f71 100644
--- a/drivers/net/ethernet/mscc/ocelot_mrp.c
+++ b/drivers/net/ethernet/mscc/ocelot_mrp.c
@@ -2,7 +2,7 @@
 /* Microsemi Ocelot Switch driver
  *
  * Copyright (c) 2017, 2019 Microsemi Corporation
- * Copyright 2020-2021 NXP Semiconductors
+ * Copyright 2020-2021 NXP
  */
 
 #include <linux/if_bridge.h>
diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c
index c0c465a4a981..e54b9fb2a97a 100644
--- a/drivers/net/ethernet/mscc/ocelot_net.c
+++ b/drivers/net/ethernet/mscc/ocelot_net.c
@@ -5,7 +5,7 @@
  * mscc_ocelot_switch_lib.
  *
  * Copyright (c) 2017, 2019 Microsemi Corporation
- * Copyright 2020-2021 NXP Semiconductors
+ * Copyright 2020-2021 NXP
  */
 
 #include <linux/if_bridge.h>
diff --git a/drivers/net/pcs/pcs-xpcs-nxp.c b/drivers/net/pcs/pcs-xpcs-nxp.c
index 984c9f7f16a8..d16fc58cd48d 100644
--- a/drivers/net/pcs/pcs-xpcs-nxp.c
+++ b/drivers/net/pcs/pcs-xpcs-nxp.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright 2021 NXP Semiconductors
+/* Copyright 2021 NXP
  */
 #include <linux/pcs/pcs-xpcs.h>
 #include "pcs-xpcs.h"
diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h
index c6bc45ae5e03..435777a0073c 100644
--- a/include/linux/dsa/ocelot.h
+++ b/include/linux/dsa/ocelot.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0
- * Copyright 2019-2021 NXP Semiconductors
+ * Copyright 2019-2021 NXP
  */
 
 #ifndef _NET_DSA_TAG_OCELOT_H
diff --git a/include/linux/packing.h b/include/linux/packing.h
index 54667735cc67..8d6571feb95d 100644
--- a/include/linux/packing.h
+++ b/include/linux/packing.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright (c) 2016-2018, NXP Semiconductors
+ * Copyright 2016-2018 NXP
  * Copyright (c) 2018-2019, Vladimir Oltean <olteanv@gmail.com>
  */
 #ifndef _LINUX_PACKING_H
diff --git a/lib/packing.c b/lib/packing.c
index 6ed72dccfdb5..9a72f4bbf0e2 100644
--- a/lib/packing.c
+++ b/lib/packing.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
-/* Copyright (c) 2016-2018, NXP Semiconductors
+/* Copyright 2016-2018 NXP
  * Copyright (c) 2018-2019, Vladimir Oltean <olteanv@gmail.com>
  */
 #include <linux/packing.h>
diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c
index d37ab98e7fe1..8025ed778d33 100644
--- a/net/dsa/tag_ocelot.c
+++ b/net/dsa/tag_ocelot.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright 2019 NXP Semiconductors
+/* Copyright 2019 NXP
  */
 #include <linux/dsa/ocelot.h>
 #include <soc/mscc/ocelot.h>
diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c
index 3038a257ba05..59072930cb02 100644
--- a/net/dsa/tag_ocelot_8021q.c
+++ b/net/dsa/tag_ocelot_8021q.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright 2020-2021 NXP Semiconductors
+/* Copyright 2020-2021 NXP
  *
  * An implementation of the software-defined tag_8021q.c tagger format, which
  * also preserves full functionality under a vlan_filtering bridge. It does
diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
index beee0d5646a6..f7d84549cc3e 100755
--- a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
+++ b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
-# Copyright 2020 NXP Semiconductors
+# Copyright 2020 NXP
 
 WAIT_TIME=1
 NUM_NETIFS=4
-- 
cgit v1.2.3


From 02319bf15acf54004216e40ac9c171437f24be24 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 16 Sep 2021 14:33:35 -0700
Subject: net: dsa: bcm_sf2: Fix array overrun in bcm_sf2_num_active_ports()

After d12e1c464988 ("net: dsa: b53: Set correct number of ports in the
DSA struct") we stopped setting dsa_switch::num_ports to DSA_MAX_PORTS,
which created an off by one error between the statically allocated
bcm_sf2_priv::port_sts array (of size DSA_MAX_PORTS). When
dsa_is_cpu_port() is used, we end-up accessing an out of bounds member
and causing a NPD.

Fix this by iterating with the appropriate port count using
ds->num_ports.

Fixes: d12e1c464988 ("net: dsa: b53: Set correct number of ports in the DSA struct")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/bcm_sf2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 6ce9ec1283e0..b6c4b3adb171 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -68,7 +68,7 @@ static unsigned int bcm_sf2_num_active_ports(struct dsa_switch *ds)
 	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
 	unsigned int port, count = 0;
 
-	for (port = 0; port < ARRAY_SIZE(priv->port_sts); port++) {
+	for (port = 0; port < ds->num_ports; port++) {
 		if (dsa_is_cpu_port(ds, port))
 			continue;
 		if (priv->port_sts[port].enabled)
-- 
cgit v1.2.3


From cf9579976f724ad517cc15b7caadea728c7e245c Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 17 Sep 2021 16:34:32 +0300
Subject: net: mdio: introduce a shutdown method to mdio device drivers

MDIO-attached devices might have interrupts and other things that might
need quiesced when we kexec into a new kernel. Things are even more
creepy when those interrupt lines are shared, and in that case it is
absolutely mandatory to disable all interrupt sources.

Moreover, MDIO devices might be DSA switches, and DSA needs its own
shutdown method to unlink from the DSA master, which is a new
requirement that appeared after commit 2f1e8ea726e9 ("net: dsa: link
interfaces with the DSA master to get rid of lockdep warnings").

So introduce a ->shutdown method in the MDIO device driver structure.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio_device.c | 11 +++++++++++
 include/linux/mdio.h          |  3 +++
 2 files changed, 14 insertions(+)

diff --git a/drivers/net/phy/mdio_device.c b/drivers/net/phy/mdio_device.c
index c94cb5382dc9..250742ffdfd9 100644
--- a/drivers/net/phy/mdio_device.c
+++ b/drivers/net/phy/mdio_device.c
@@ -179,6 +179,16 @@ static int mdio_remove(struct device *dev)
 	return 0;
 }
 
+static void mdio_shutdown(struct device *dev)
+{
+	struct mdio_device *mdiodev = to_mdio_device(dev);
+	struct device_driver *drv = mdiodev->dev.driver;
+	struct mdio_driver *mdiodrv = to_mdio_driver(drv);
+
+	if (mdiodrv->shutdown)
+		mdiodrv->shutdown(mdiodev);
+}
+
 /**
  * mdio_driver_register - register an mdio_driver with the MDIO layer
  * @drv: new mdio_driver to register
@@ -193,6 +203,7 @@ int mdio_driver_register(struct mdio_driver *drv)
 	mdiodrv->driver.bus = &mdio_bus_type;
 	mdiodrv->driver.probe = mdio_probe;
 	mdiodrv->driver.remove = mdio_remove;
+	mdiodrv->driver.shutdown = mdio_shutdown;
 
 	retval = driver_register(&mdiodrv->driver);
 	if (retval) {
diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index ffb787d5ebde..5e6dc38f418e 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -80,6 +80,9 @@ struct mdio_driver {
 
 	/* Clears up any memory if needed */
 	void (*remove)(struct mdio_device *mdiodev);
+
+	/* Quiesces the device on system shutdown, turns off interrupts etc */
+	void (*shutdown)(struct mdio_device *mdiodev);
 };
 
 static inline struct mdio_driver *
-- 
cgit v1.2.3


From 0650bf52b31ff35dc6430fc2e37969c36baba724 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 17 Sep 2021 16:34:33 +0300
Subject: net: dsa: be compatible with masters which unregister on shutdown

Lino reports that on his system with bcmgenet as DSA master and KSZ9897
as a switch, rebooting or shutting down never works properly.

What does the bcmgenet driver have special to trigger this, that other
DSA masters do not? It has an implementation of ->shutdown which simply
calls its ->remove implementation. Otherwise said, it unregisters its
network interface on shutdown.

This message can be seen in a loop, and it hangs the reboot process there:

unregister_netdevice: waiting for eth0 to become free. Usage count = 3

So why 3?

A usage count of 1 is normal for a registered network interface, and any
virtual interface which links itself as an upper of that will increment
it via dev_hold. In the case of DSA, this is the call path:

dsa_slave_create
-> netdev_upper_dev_link
   -> __netdev_upper_dev_link
      -> __netdev_adjacent_dev_insert
         -> dev_hold

So a DSA switch with 3 interfaces will result in a usage count elevated
by two, and netdev_wait_allrefs will wait until they have gone away.

Other stacked interfaces, like VLAN, watch NETDEV_UNREGISTER events and
delete themselves, but DSA cannot just vanish and go poof, at most it
can unbind itself from the switch devices, but that must happen strictly
earlier compared to when the DSA master unregisters its net_device, so
reacting on the NETDEV_UNREGISTER event is way too late.

It seems that it is a pretty established pattern to have a driver's
->shutdown hook redirect to its ->remove hook, so the same code is
executed regardless of whether the driver is unbound from the device, or
the system is just shutting down. As Florian puts it, it is quite a big
hammer for bcmgenet to unregister its net_device during shutdown, but
having a common code path with the driver unbind helps ensure it is well
tested.

So DSA, for better or for worse, has to live with that and engage in an
arms race of implementing the ->shutdown hook too, from all individual
drivers, and do something sane when paired with masters that unregister
their net_device there. The only sane thing to do, of course, is to
unlink from the master.

However, complications arise really quickly.

The pattern of redirecting ->shutdown to ->remove is not unique to
bcmgenet or even to net_device drivers. In fact, SPI controllers do it
too (see dspi_shutdown -> dspi_remove), and presumably, I2C controllers
and MDIO controllers do it too (this is something I have not researched
too deeply, but even if this is not the case today, it is certainly
plausible to happen in the future, and must be taken into consideration).

Since DSA switches might be SPI devices, I2C devices, MDIO devices, the
insane implication is that for the exact same DSA switch device, we
might have both ->shutdown and ->remove getting called.

So we need to do something with that insane environment. The pattern
I've come up with is "if this, then not that", so if either ->shutdown
or ->remove gets called, we set the device's drvdata to NULL, and in the
other hook, we check whether the drvdata is NULL and just do nothing.
This is probably not necessary for platform devices, just for devices on
buses, but I would really insist for consistency among drivers, because
when code is copy-pasted, it is not always copy-pasted from the best
sources.

So depending on whether the DSA switch's ->remove or ->shutdown will get
called first, we cannot really guarantee even for the same driver if
rebooting will result in the same code path on all platforms. But
nonetheless, we need to do something minimally reasonable on ->shutdown
too to fix the bug. Of course, the ->remove will do more (a full
teardown of the tree, with all data structures freed, and this is why
the bug was not caught for so long). The new ->shutdown method is kept
separate from dsa_unregister_switch not because we couldn't have
unregistered the switch, but simply in the interest of doing something
quick and to the point.

The big question is: does the DSA switch's ->shutdown get called earlier
than the DSA master's ->shutdown? If not, there is still a risk that we
might still trigger the WARN_ON in unregister_netdevice that says we are
attempting to unregister a net_device which has uppers. That's no good.
Although the reference to the master net_device won't physically go away
even if DSA's ->shutdown comes afterwards, remember we have a dev_hold
on it.

The answer to that question lies in this comment above device_link_add:

 * A side effect of the link creation is re-ordering of dpm_list and the
 * devices_kset list by moving the consumer device and all devices depending
 * on it to the ends of these lists (that does not happen to devices that have
 * not been registered when this function is called).

so the fact that DSA uses device_link_add towards its master is not
exactly for nothing. device_shutdown() walks devices_kset from the back,
so this is our guarantee that DSA's shutdown happens before the master's
shutdown.

Fixes: 2f1e8ea726e9 ("net: dsa: link interfaces with the DSA master to get rid of lockdep warnings")
Link: https://lore.kernel.org/netdev/20210909095324.12978-1-LinoSanfilippo@gmx.de/
Reported-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Tested-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/b53/b53_mdio.c             | 21 +++++++++++--
 drivers/net/dsa/b53/b53_mmap.c             | 13 ++++++++
 drivers/net/dsa/b53/b53_priv.h             |  5 +++
 drivers/net/dsa/b53/b53_spi.c              | 13 ++++++++
 drivers/net/dsa/b53/b53_srab.c             | 21 +++++++++++--
 drivers/net/dsa/bcm_sf2.c                  | 12 +++++++
 drivers/net/dsa/dsa_loop.c                 | 22 ++++++++++++-
 drivers/net/dsa/lan9303-core.c             |  6 ++++
 drivers/net/dsa/lan9303.h                  |  1 +
 drivers/net/dsa/lan9303_i2c.c              | 24 +++++++++++---
 drivers/net/dsa/lan9303_mdio.c             | 15 +++++++++
 drivers/net/dsa/lantiq_gswip.c             | 18 +++++++++++
 drivers/net/dsa/microchip/ksz8795_spi.c    | 11 ++++++-
 drivers/net/dsa/microchip/ksz9477_i2c.c    | 14 +++++++--
 drivers/net/dsa/microchip/ksz9477_spi.c    |  8 +++--
 drivers/net/dsa/mt7530.c                   | 18 +++++++++++
 drivers/net/dsa/mv88e6060.c                | 18 +++++++++++
 drivers/net/dsa/mv88e6xxx/chip.c           | 22 ++++++++++++-
 drivers/net/dsa/ocelot/felix_vsc9959.c     | 20 ++++++++++--
 drivers/net/dsa/ocelot/seville_vsc9953.c   | 20 ++++++++++--
 drivers/net/dsa/qca/ar9331.c               | 18 +++++++++++
 drivers/net/dsa/qca8k.c                    | 18 +++++++++++
 drivers/net/dsa/realtek-smi-core.c         | 20 +++++++++++-
 drivers/net/dsa/sja1105/sja1105_main.c     | 21 +++++++++++--
 drivers/net/dsa/vitesse-vsc73xx-core.c     |  6 ++++
 drivers/net/dsa/vitesse-vsc73xx-platform.c | 22 ++++++++++++-
 drivers/net/dsa/vitesse-vsc73xx-spi.c      | 22 ++++++++++++-
 drivers/net/dsa/vitesse-vsc73xx.h          |  1 +
 include/net/dsa.h                          |  1 +
 net/dsa/dsa2.c                             | 50 ++++++++++++++++++++++++++++++
 30 files changed, 457 insertions(+), 24 deletions(-)

diff --git a/drivers/net/dsa/b53/b53_mdio.c b/drivers/net/dsa/b53/b53_mdio.c
index a533a90e3904..a7aeb3c132c9 100644
--- a/drivers/net/dsa/b53/b53_mdio.c
+++ b/drivers/net/dsa/b53/b53_mdio.c
@@ -351,9 +351,25 @@ static int b53_mdio_probe(struct mdio_device *mdiodev)
 static void b53_mdio_remove(struct mdio_device *mdiodev)
 {
 	struct b53_device *dev = dev_get_drvdata(&mdiodev->dev);
-	struct dsa_switch *ds = dev->ds;
 
-	dsa_unregister_switch(ds);
+	if (!dev)
+		return;
+
+	b53_switch_remove(dev);
+
+	dev_set_drvdata(&mdiodev->dev, NULL);
+}
+
+static void b53_mdio_shutdown(struct mdio_device *mdiodev)
+{
+	struct b53_device *dev = dev_get_drvdata(&mdiodev->dev);
+
+	if (!dev)
+		return;
+
+	b53_switch_shutdown(dev);
+
+	dev_set_drvdata(&mdiodev->dev, NULL);
 }
 
 static const struct of_device_id b53_of_match[] = {
@@ -373,6 +389,7 @@ MODULE_DEVICE_TABLE(of, b53_of_match);
 static struct mdio_driver b53_mdio_driver = {
 	.probe	= b53_mdio_probe,
 	.remove	= b53_mdio_remove,
+	.shutdown = b53_mdio_shutdown,
 	.mdiodrv.driver = {
 		.name = "bcm53xx",
 		.of_match_table = b53_of_match,
diff --git a/drivers/net/dsa/b53/b53_mmap.c b/drivers/net/dsa/b53/b53_mmap.c
index 82680e083cc2..ae4c79d39bc0 100644
--- a/drivers/net/dsa/b53/b53_mmap.c
+++ b/drivers/net/dsa/b53/b53_mmap.c
@@ -316,9 +316,21 @@ static int b53_mmap_remove(struct platform_device *pdev)
 	if (dev)
 		b53_switch_remove(dev);
 
+	platform_set_drvdata(pdev, NULL);
+
 	return 0;
 }
 
+static void b53_mmap_shutdown(struct platform_device *pdev)
+{
+	struct b53_device *dev = platform_get_drvdata(pdev);
+
+	if (dev)
+		b53_switch_shutdown(dev);
+
+	platform_set_drvdata(pdev, NULL);
+}
+
 static const struct of_device_id b53_mmap_of_table[] = {
 	{ .compatible = "brcm,bcm3384-switch" },
 	{ .compatible = "brcm,bcm6328-switch" },
@@ -331,6 +343,7 @@ MODULE_DEVICE_TABLE(of, b53_mmap_of_table);
 static struct platform_driver b53_mmap_driver = {
 	.probe = b53_mmap_probe,
 	.remove = b53_mmap_remove,
+	.shutdown = b53_mmap_shutdown,
 	.driver = {
 		.name = "b53-switch",
 		.of_match_table = b53_mmap_of_table,
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index 5d068acf7cf8..959a52d41f0a 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -228,6 +228,11 @@ static inline void b53_switch_remove(struct b53_device *dev)
 	dsa_unregister_switch(dev->ds);
 }
 
+static inline void b53_switch_shutdown(struct b53_device *dev)
+{
+	dsa_switch_shutdown(dev->ds);
+}
+
 #define b53_build_op(type_op_size, val_type)				\
 static inline int b53_##type_op_size(struct b53_device *dev, u8 page,	\
 				     u8 reg, val_type val)		\
diff --git a/drivers/net/dsa/b53/b53_spi.c b/drivers/net/dsa/b53/b53_spi.c
index ecb9f7f6b335..01e37b75471e 100644
--- a/drivers/net/dsa/b53/b53_spi.c
+++ b/drivers/net/dsa/b53/b53_spi.c
@@ -321,9 +321,21 @@ static int b53_spi_remove(struct spi_device *spi)
 	if (dev)
 		b53_switch_remove(dev);
 
+	spi_set_drvdata(spi, NULL);
+
 	return 0;
 }
 
+static void b53_spi_shutdown(struct spi_device *spi)
+{
+	struct b53_device *dev = spi_get_drvdata(spi);
+
+	if (dev)
+		b53_switch_shutdown(dev);
+
+	spi_set_drvdata(spi, NULL);
+}
+
 static const struct of_device_id b53_spi_of_match[] = {
 	{ .compatible = "brcm,bcm5325" },
 	{ .compatible = "brcm,bcm5365" },
@@ -344,6 +356,7 @@ static struct spi_driver b53_spi_driver = {
 	},
 	.probe	= b53_spi_probe,
 	.remove	= b53_spi_remove,
+	.shutdown = b53_spi_shutdown,
 };
 
 module_spi_driver(b53_spi_driver);
diff --git a/drivers/net/dsa/b53/b53_srab.c b/drivers/net/dsa/b53/b53_srab.c
index 3f4249de70c5..4591bb1c05d2 100644
--- a/drivers/net/dsa/b53/b53_srab.c
+++ b/drivers/net/dsa/b53/b53_srab.c
@@ -629,17 +629,34 @@ static int b53_srab_probe(struct platform_device *pdev)
 static int b53_srab_remove(struct platform_device *pdev)
 {
 	struct b53_device *dev = platform_get_drvdata(pdev);
-	struct b53_srab_priv *priv = dev->priv;
 
-	b53_srab_intr_set(priv, false);
+	if (!dev)
+		return 0;
+
+	b53_srab_intr_set(dev->priv, false);
 	b53_switch_remove(dev);
 
+	platform_set_drvdata(pdev, NULL);
+
 	return 0;
 }
 
+static void b53_srab_shutdown(struct platform_device *pdev)
+{
+	struct b53_device *dev = platform_get_drvdata(pdev);
+
+	if (!dev)
+		return;
+
+	b53_switch_shutdown(dev);
+
+	platform_set_drvdata(pdev, NULL);
+}
+
 static struct platform_driver b53_srab_driver = {
 	.probe = b53_srab_probe,
 	.remove = b53_srab_remove,
+	.shutdown = b53_srab_shutdown,
 	.driver = {
 		.name = "b53-srab-switch",
 		.of_match_table = b53_srab_of_match,
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index b6c4b3adb171..7578a5c38df5 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -1512,6 +1512,9 @@ static int bcm_sf2_sw_remove(struct platform_device *pdev)
 {
 	struct bcm_sf2_priv *priv = platform_get_drvdata(pdev);
 
+	if (!priv)
+		return 0;
+
 	priv->wol_ports_mask = 0;
 	/* Disable interrupts */
 	bcm_sf2_intr_disable(priv);
@@ -1523,6 +1526,8 @@ static int bcm_sf2_sw_remove(struct platform_device *pdev)
 	if (priv->type == BCM7278_DEVICE_ID)
 		reset_control_assert(priv->rcdev);
 
+	platform_set_drvdata(pdev, NULL);
+
 	return 0;
 }
 
@@ -1530,6 +1535,9 @@ static void bcm_sf2_sw_shutdown(struct platform_device *pdev)
 {
 	struct bcm_sf2_priv *priv = platform_get_drvdata(pdev);
 
+	if (!priv)
+		return;
+
 	/* For a kernel about to be kexec'd we want to keep the GPHY on for a
 	 * successful MDIO bus scan to occur. If we did turn off the GPHY
 	 * before (e.g: port_disable), this will also power it back on.
@@ -1538,6 +1546,10 @@ static void bcm_sf2_sw_shutdown(struct platform_device *pdev)
 	 */
 	if (priv->hw_params.num_gphy == 1)
 		bcm_sf2_gphy_enable_set(priv->dev->ds, true);
+
+	dsa_switch_shutdown(priv->dev->ds);
+
+	platform_set_drvdata(pdev, NULL);
 }
 
 #ifdef CONFIG_PM_SLEEP
diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c
index bfdf3324aac3..e638e3eea911 100644
--- a/drivers/net/dsa/dsa_loop.c
+++ b/drivers/net/dsa/dsa_loop.c
@@ -340,10 +340,29 @@ static int dsa_loop_drv_probe(struct mdio_device *mdiodev)
 static void dsa_loop_drv_remove(struct mdio_device *mdiodev)
 {
 	struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev);
-	struct dsa_loop_priv *ps = ds->priv;
+	struct dsa_loop_priv *ps;
+
+	if (!ds)
+		return;
+
+	ps = ds->priv;
 
 	dsa_unregister_switch(ds);
 	dev_put(ps->netdev);
+
+	dev_set_drvdata(&mdiodev->dev, NULL);
+}
+
+static void dsa_loop_drv_shutdown(struct mdio_device *mdiodev)
+{
+	struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev);
+
+	if (!ds)
+		return;
+
+	dsa_switch_shutdown(ds);
+
+	dev_set_drvdata(&mdiodev->dev, NULL);
 }
 
 static struct mdio_driver dsa_loop_drv = {
@@ -352,6 +371,7 @@ static struct mdio_driver dsa_loop_drv = {
 	},
 	.probe	= dsa_loop_drv_probe,
 	.remove	= dsa_loop_drv_remove,
+	.shutdown = dsa_loop_drv_shutdown,
 };
 
 #define NUM_FIXED_PHYS	(DSA_LOOP_NUM_PORTS - 2)
diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c
index d7ce281570b5..89f920289ae2 100644
--- a/drivers/net/dsa/lan9303-core.c
+++ b/drivers/net/dsa/lan9303-core.c
@@ -1379,6 +1379,12 @@ int lan9303_remove(struct lan9303 *chip)
 }
 EXPORT_SYMBOL(lan9303_remove);
 
+void lan9303_shutdown(struct lan9303 *chip)
+{
+	dsa_switch_shutdown(chip->ds);
+}
+EXPORT_SYMBOL(lan9303_shutdown);
+
 MODULE_AUTHOR("Juergen Borleis <kernel@pengutronix.de>");
 MODULE_DESCRIPTION("Core driver for SMSC/Microchip LAN9303 three port ethernet switch");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/dsa/lan9303.h b/drivers/net/dsa/lan9303.h
index 11f590b64701..c7f73efa50f0 100644
--- a/drivers/net/dsa/lan9303.h
+++ b/drivers/net/dsa/lan9303.h
@@ -10,3 +10,4 @@ extern const struct lan9303_phy_ops lan9303_indirect_phy_ops;
 
 int lan9303_probe(struct lan9303 *chip, struct device_node *np);
 int lan9303_remove(struct lan9303 *chip);
+void lan9303_shutdown(struct lan9303 *chip);
diff --git a/drivers/net/dsa/lan9303_i2c.c b/drivers/net/dsa/lan9303_i2c.c
index 9bffaef65a04..8ca4713310fa 100644
--- a/drivers/net/dsa/lan9303_i2c.c
+++ b/drivers/net/dsa/lan9303_i2c.c
@@ -67,13 +67,28 @@ static int lan9303_i2c_probe(struct i2c_client *client,
 
 static int lan9303_i2c_remove(struct i2c_client *client)
 {
-	struct lan9303_i2c *sw_dev;
+	struct lan9303_i2c *sw_dev = i2c_get_clientdata(client);
 
-	sw_dev = i2c_get_clientdata(client);
 	if (!sw_dev)
-		return -ENODEV;
+		return 0;
+
+	lan9303_remove(&sw_dev->chip);
+
+	i2c_set_clientdata(client, NULL);
+
+	return 0;
+}
+
+static void lan9303_i2c_shutdown(struct i2c_client *client)
+{
+	struct lan9303_i2c *sw_dev = i2c_get_clientdata(client);
+
+	if (!sw_dev)
+		return;
+
+	lan9303_shutdown(&sw_dev->chip);
 
-	return lan9303_remove(&sw_dev->chip);
+	i2c_set_clientdata(client, NULL);
 }
 
 /*-------------------------------------------------------------------------*/
@@ -97,6 +112,7 @@ static struct i2c_driver lan9303_i2c_driver = {
 	},
 	.probe = lan9303_i2c_probe,
 	.remove = lan9303_i2c_remove,
+	.shutdown = lan9303_i2c_shutdown,
 	.id_table = lan9303_i2c_id,
 };
 module_i2c_driver(lan9303_i2c_driver);
diff --git a/drivers/net/dsa/lan9303_mdio.c b/drivers/net/dsa/lan9303_mdio.c
index 9cbe80460b53..bbb7032409ba 100644
--- a/drivers/net/dsa/lan9303_mdio.c
+++ b/drivers/net/dsa/lan9303_mdio.c
@@ -138,6 +138,20 @@ static void lan9303_mdio_remove(struct mdio_device *mdiodev)
 		return;
 
 	lan9303_remove(&sw_dev->chip);
+
+	dev_set_drvdata(&mdiodev->dev, NULL);
+}
+
+static void lan9303_mdio_shutdown(struct mdio_device *mdiodev)
+{
+	struct lan9303_mdio *sw_dev = dev_get_drvdata(&mdiodev->dev);
+
+	if (!sw_dev)
+		return;
+
+	lan9303_shutdown(&sw_dev->chip);
+
+	dev_set_drvdata(&mdiodev->dev, NULL);
 }
 
 /*-------------------------------------------------------------------------*/
@@ -155,6 +169,7 @@ static struct mdio_driver lan9303_mdio_driver = {
 	},
 	.probe  = lan9303_mdio_probe,
 	.remove = lan9303_mdio_remove,
+	.shutdown = lan9303_mdio_shutdown,
 };
 mdio_module_driver(lan9303_mdio_driver);
 
diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c
index 267324889dd6..3ff4b7e177f3 100644
--- a/drivers/net/dsa/lantiq_gswip.c
+++ b/drivers/net/dsa/lantiq_gswip.c
@@ -2184,6 +2184,9 @@ static int gswip_remove(struct platform_device *pdev)
 	struct gswip_priv *priv = platform_get_drvdata(pdev);
 	int i;
 
+	if (!priv)
+		return 0;
+
 	/* disable the switch */
 	gswip_mdio_mask(priv, GSWIP_MDIO_GLOB_ENABLE, 0, GSWIP_MDIO_GLOB);
 
@@ -2197,9 +2200,23 @@ static int gswip_remove(struct platform_device *pdev)
 	for (i = 0; i < priv->num_gphy_fw; i++)
 		gswip_gphy_fw_remove(priv, &priv->gphy_fw[i]);
 
+	platform_set_drvdata(pdev, NULL);
+
 	return 0;
 }
 
+static void gswip_shutdown(struct platform_device *pdev)
+{
+	struct gswip_priv *priv = platform_get_drvdata(pdev);
+
+	if (!priv)
+		return;
+
+	dsa_switch_shutdown(priv->ds);
+
+	platform_set_drvdata(pdev, NULL);
+}
+
 static const struct gswip_hw_info gswip_xrx200 = {
 	.max_ports = 7,
 	.cpu_port = 6,
@@ -2223,6 +2240,7 @@ MODULE_DEVICE_TABLE(of, gswip_of_match);
 static struct platform_driver gswip_driver = {
 	.probe = gswip_probe,
 	.remove = gswip_remove,
+	.shutdown = gswip_shutdown,
 	.driver = {
 		.name = "gswip",
 		.of_match_table = gswip_of_match,
diff --git a/drivers/net/dsa/microchip/ksz8795_spi.c b/drivers/net/dsa/microchip/ksz8795_spi.c
index ea7550d1b634..866767b70d65 100644
--- a/drivers/net/dsa/microchip/ksz8795_spi.c
+++ b/drivers/net/dsa/microchip/ksz8795_spi.c
@@ -94,6 +94,8 @@ static int ksz8795_spi_remove(struct spi_device *spi)
 	if (dev)
 		ksz_switch_remove(dev);
 
+	spi_set_drvdata(spi, NULL);
+
 	return 0;
 }
 
@@ -101,8 +103,15 @@ static void ksz8795_spi_shutdown(struct spi_device *spi)
 {
 	struct ksz_device *dev = spi_get_drvdata(spi);
 
-	if (dev && dev->dev_ops->shutdown)
+	if (!dev)
+		return;
+
+	if (dev->dev_ops->shutdown)
 		dev->dev_ops->shutdown(dev);
+
+	dsa_switch_shutdown(dev->ds);
+
+	spi_set_drvdata(spi, NULL);
 }
 
 static const struct of_device_id ksz8795_dt_ids[] = {
diff --git a/drivers/net/dsa/microchip/ksz9477_i2c.c b/drivers/net/dsa/microchip/ksz9477_i2c.c
index 4e053a25d077..f3afb8b8c4cc 100644
--- a/drivers/net/dsa/microchip/ksz9477_i2c.c
+++ b/drivers/net/dsa/microchip/ksz9477_i2c.c
@@ -56,7 +56,10 @@ static int ksz9477_i2c_remove(struct i2c_client *i2c)
 {
 	struct ksz_device *dev = i2c_get_clientdata(i2c);
 
-	ksz_switch_remove(dev);
+	if (dev)
+		ksz_switch_remove(dev);
+
+	i2c_set_clientdata(i2c, NULL);
 
 	return 0;
 }
@@ -65,8 +68,15 @@ static void ksz9477_i2c_shutdown(struct i2c_client *i2c)
 {
 	struct ksz_device *dev = i2c_get_clientdata(i2c);
 
-	if (dev && dev->dev_ops->shutdown)
+	if (!dev)
+		return;
+
+	if (dev->dev_ops->shutdown)
 		dev->dev_ops->shutdown(dev);
+
+	dsa_switch_shutdown(dev->ds);
+
+	i2c_set_clientdata(i2c, NULL);
 }
 
 static const struct i2c_device_id ksz9477_i2c_id[] = {
diff --git a/drivers/net/dsa/microchip/ksz9477_spi.c b/drivers/net/dsa/microchip/ksz9477_spi.c
index 15bc11b3cda4..e3cb0e6c9f6f 100644
--- a/drivers/net/dsa/microchip/ksz9477_spi.c
+++ b/drivers/net/dsa/microchip/ksz9477_spi.c
@@ -72,6 +72,8 @@ static int ksz9477_spi_remove(struct spi_device *spi)
 	if (dev)
 		ksz_switch_remove(dev);
 
+	spi_set_drvdata(spi, NULL);
+
 	return 0;
 }
 
@@ -79,8 +81,10 @@ static void ksz9477_spi_shutdown(struct spi_device *spi)
 {
 	struct ksz_device *dev = spi_get_drvdata(spi);
 
-	if (dev && dev->dev_ops->shutdown)
-		dev->dev_ops->shutdown(dev);
+	if (dev)
+		dsa_switch_shutdown(dev->ds);
+
+	spi_set_drvdata(spi, NULL);
 }
 
 static const struct of_device_id ksz9477_dt_ids[] = {
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index d0cba2d1cd68..094737e5084a 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -3286,6 +3286,9 @@ mt7530_remove(struct mdio_device *mdiodev)
 	struct mt7530_priv *priv = dev_get_drvdata(&mdiodev->dev);
 	int ret = 0;
 
+	if (!priv)
+		return;
+
 	ret = regulator_disable(priv->core_pwr);
 	if (ret < 0)
 		dev_err(priv->dev,
@@ -3301,11 +3304,26 @@ mt7530_remove(struct mdio_device *mdiodev)
 
 	dsa_unregister_switch(priv->ds);
 	mutex_destroy(&priv->reg_mutex);
+
+	dev_set_drvdata(&mdiodev->dev, NULL);
+}
+
+static void mt7530_shutdown(struct mdio_device *mdiodev)
+{
+	struct mt7530_priv *priv = dev_get_drvdata(&mdiodev->dev);
+
+	if (!priv)
+		return;
+
+	dsa_switch_shutdown(priv->ds);
+
+	dev_set_drvdata(&mdiodev->dev, NULL);
 }
 
 static struct mdio_driver mt7530_mdio_driver = {
 	.probe  = mt7530_probe,
 	.remove = mt7530_remove,
+	.shutdown = mt7530_shutdown,
 	.mdiodrv.driver = {
 		.name = "mt7530",
 		.of_match_table = mt7530_of_match,
diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c
index 24b8219fd607..a4c6eb9a52d0 100644
--- a/drivers/net/dsa/mv88e6060.c
+++ b/drivers/net/dsa/mv88e6060.c
@@ -290,7 +290,24 @@ static void mv88e6060_remove(struct mdio_device *mdiodev)
 {
 	struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev);
 
+	if (!ds)
+		return;
+
 	dsa_unregister_switch(ds);
+
+	dev_set_drvdata(&mdiodev->dev, NULL);
+}
+
+static void mv88e6060_shutdown(struct mdio_device *mdiodev)
+{
+	struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev);
+
+	if (!ds)
+		return;
+
+	dsa_switch_shutdown(ds);
+
+	dev_set_drvdata(&mdiodev->dev, NULL);
 }
 
 static const struct of_device_id mv88e6060_of_match[] = {
@@ -303,6 +320,7 @@ static const struct of_device_id mv88e6060_of_match[] = {
 static struct mdio_driver mv88e6060_driver = {
 	.probe	= mv88e6060_probe,
 	.remove = mv88e6060_remove,
+	.shutdown = mv88e6060_shutdown,
 	.mdiodrv.driver = {
 		.name = "mv88e6060",
 		.of_match_table = mv88e6060_of_match,
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index c45ca2473743..fb10422d2c33 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -6389,7 +6389,12 @@ out:
 static void mv88e6xxx_remove(struct mdio_device *mdiodev)
 {
 	struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev);
-	struct mv88e6xxx_chip *chip = ds->priv;
+	struct mv88e6xxx_chip *chip;
+
+	if (!ds)
+		return;
+
+	chip = ds->priv;
 
 	if (chip->info->ptp_support) {
 		mv88e6xxx_hwtstamp_free(chip);
@@ -6410,6 +6415,20 @@ static void mv88e6xxx_remove(struct mdio_device *mdiodev)
 		mv88e6xxx_g1_irq_free(chip);
 	else
 		mv88e6xxx_irq_poll_free(chip);
+
+	dev_set_drvdata(&mdiodev->dev, NULL);
+}
+
+static void mv88e6xxx_shutdown(struct mdio_device *mdiodev)
+{
+	struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev);
+
+	if (!ds)
+		return;
+
+	dsa_switch_shutdown(ds);
+
+	dev_set_drvdata(&mdiodev->dev, NULL);
 }
 
 static const struct of_device_id mv88e6xxx_of_match[] = {
@@ -6433,6 +6452,7 @@ MODULE_DEVICE_TABLE(of, mv88e6xxx_of_match);
 static struct mdio_driver mv88e6xxx_driver = {
 	.probe	= mv88e6xxx_probe,
 	.remove = mv88e6xxx_remove,
+	.shutdown = mv88e6xxx_shutdown,
 	.mdiodrv.driver = {
 		.name = "mv88e6085",
 		.of_match_table = mv88e6xxx_of_match,
diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c
index 9e2ac8e46619..11b42fd812e4 100644
--- a/drivers/net/dsa/ocelot/felix_vsc9959.c
+++ b/drivers/net/dsa/ocelot/felix_vsc9959.c
@@ -1472,9 +1472,10 @@ err_pci_enable:
 
 static void felix_pci_remove(struct pci_dev *pdev)
 {
-	struct felix *felix;
+	struct felix *felix = pci_get_drvdata(pdev);
 
-	felix = pci_get_drvdata(pdev);
+	if (!felix)
+		return;
 
 	dsa_unregister_switch(felix->ds);
 
@@ -1482,6 +1483,20 @@ static void felix_pci_remove(struct pci_dev *pdev)
 	kfree(felix);
 
 	pci_disable_device(pdev);
+
+	pci_set_drvdata(pdev, NULL);
+}
+
+static void felix_pci_shutdown(struct pci_dev *pdev)
+{
+	struct felix *felix = pci_get_drvdata(pdev);
+
+	if (!felix)
+		return;
+
+	dsa_switch_shutdown(felix->ds);
+
+	pci_set_drvdata(pdev, NULL);
 }
 
 static struct pci_device_id felix_ids[] = {
@@ -1498,6 +1513,7 @@ static struct pci_driver felix_vsc9959_pci_driver = {
 	.id_table	= felix_ids,
 	.probe		= felix_pci_probe,
 	.remove		= felix_pci_remove,
+	.shutdown	= felix_pci_shutdown,
 };
 module_pci_driver(felix_vsc9959_pci_driver);
 
diff --git a/drivers/net/dsa/ocelot/seville_vsc9953.c b/drivers/net/dsa/ocelot/seville_vsc9953.c
index deae923c8b7a..de1d34a1f1e4 100644
--- a/drivers/net/dsa/ocelot/seville_vsc9953.c
+++ b/drivers/net/dsa/ocelot/seville_vsc9953.c
@@ -1245,18 +1245,33 @@ err_alloc_felix:
 
 static int seville_remove(struct platform_device *pdev)
 {
-	struct felix *felix;
+	struct felix *felix = platform_get_drvdata(pdev);
 
-	felix = platform_get_drvdata(pdev);
+	if (!felix)
+		return 0;
 
 	dsa_unregister_switch(felix->ds);
 
 	kfree(felix->ds);
 	kfree(felix);
 
+	platform_set_drvdata(pdev, NULL);
+
 	return 0;
 }
 
+static void seville_shutdown(struct platform_device *pdev)
+{
+	struct felix *felix = platform_get_drvdata(pdev);
+
+	if (!felix)
+		return;
+
+	dsa_switch_shutdown(felix->ds);
+
+	platform_set_drvdata(pdev, NULL);
+}
+
 static const struct of_device_id seville_of_match[] = {
 	{ .compatible = "mscc,vsc9953-switch" },
 	{ },
@@ -1266,6 +1281,7 @@ MODULE_DEVICE_TABLE(of, seville_of_match);
 static struct platform_driver seville_vsc9953_driver = {
 	.probe		= seville_probe,
 	.remove		= seville_remove,
+	.shutdown	= seville_shutdown,
 	.driver = {
 		.name		= "mscc_seville",
 		.of_match_table	= of_match_ptr(seville_of_match),
diff --git a/drivers/net/dsa/qca/ar9331.c b/drivers/net/dsa/qca/ar9331.c
index 563d8a279030..a6bfb6abc51a 100644
--- a/drivers/net/dsa/qca/ar9331.c
+++ b/drivers/net/dsa/qca/ar9331.c
@@ -1083,6 +1083,9 @@ static void ar9331_sw_remove(struct mdio_device *mdiodev)
 	struct ar9331_sw_priv *priv = dev_get_drvdata(&mdiodev->dev);
 	unsigned int i;
 
+	if (!priv)
+		return;
+
 	for (i = 0; i < ARRAY_SIZE(priv->port); i++) {
 		struct ar9331_sw_port *port = &priv->port[i];
 
@@ -1094,6 +1097,20 @@ static void ar9331_sw_remove(struct mdio_device *mdiodev)
 	dsa_unregister_switch(&priv->ds);
 
 	reset_control_assert(priv->sw_reset);
+
+	dev_set_drvdata(&mdiodev->dev, NULL);
+}
+
+static void ar9331_sw_shutdown(struct mdio_device *mdiodev)
+{
+	struct ar9331_sw_priv *priv = dev_get_drvdata(&mdiodev->dev);
+
+	if (!priv)
+		return;
+
+	dsa_switch_shutdown(&priv->ds);
+
+	dev_set_drvdata(&mdiodev->dev, NULL);
 }
 
 static const struct of_device_id ar9331_sw_of_match[] = {
@@ -1104,6 +1121,7 @@ static const struct of_device_id ar9331_sw_of_match[] = {
 static struct mdio_driver ar9331_sw_mdio_driver = {
 	.probe = ar9331_sw_probe,
 	.remove = ar9331_sw_remove,
+	.shutdown = ar9331_sw_shutdown,
 	.mdiodrv.driver = {
 		.name = AR9331_SW_NAME,
 		.of_match_table = ar9331_sw_of_match,
diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index bda5a9bf4f52..a984f06f6f04 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -1880,10 +1880,27 @@ qca8k_sw_remove(struct mdio_device *mdiodev)
 	struct qca8k_priv *priv = dev_get_drvdata(&mdiodev->dev);
 	int i;
 
+	if (!priv)
+		return;
+
 	for (i = 0; i < QCA8K_NUM_PORTS; i++)
 		qca8k_port_set_status(priv, i, 0);
 
 	dsa_unregister_switch(priv->ds);
+
+	dev_set_drvdata(&mdiodev->dev, NULL);
+}
+
+static void qca8k_sw_shutdown(struct mdio_device *mdiodev)
+{
+	struct qca8k_priv *priv = dev_get_drvdata(&mdiodev->dev);
+
+	if (!priv)
+		return;
+
+	dsa_switch_shutdown(priv->ds);
+
+	dev_set_drvdata(&mdiodev->dev, NULL);
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -1940,6 +1957,7 @@ static const struct of_device_id qca8k_of_match[] = {
 static struct mdio_driver qca8kmdio_driver = {
 	.probe  = qca8k_sw_probe,
 	.remove = qca8k_sw_remove,
+	.shutdown = qca8k_sw_shutdown,
 	.mdiodrv.driver = {
 		.name = "qca8k",
 		.of_match_table = qca8k_of_match,
diff --git a/drivers/net/dsa/realtek-smi-core.c b/drivers/net/dsa/realtek-smi-core.c
index 8e49d4f85d48..dd2f0d6208b3 100644
--- a/drivers/net/dsa/realtek-smi-core.c
+++ b/drivers/net/dsa/realtek-smi-core.c
@@ -464,16 +464,33 @@ static int realtek_smi_probe(struct platform_device *pdev)
 
 static int realtek_smi_remove(struct platform_device *pdev)
 {
-	struct realtek_smi *smi = dev_get_drvdata(&pdev->dev);
+	struct realtek_smi *smi = platform_get_drvdata(pdev);
+
+	if (!smi)
+		return 0;
 
 	dsa_unregister_switch(smi->ds);
 	if (smi->slave_mii_bus)
 		of_node_put(smi->slave_mii_bus->dev.of_node);
 	gpiod_set_value(smi->reset, 1);
 
+	platform_set_drvdata(pdev, NULL);
+
 	return 0;
 }
 
+static void realtek_smi_shutdown(struct platform_device *pdev)
+{
+	struct realtek_smi *smi = platform_get_drvdata(pdev);
+
+	if (!smi)
+		return;
+
+	dsa_switch_shutdown(smi->ds);
+
+	platform_set_drvdata(pdev, NULL);
+}
+
 static const struct of_device_id realtek_smi_of_match[] = {
 	{
 		.compatible = "realtek,rtl8366rb",
@@ -495,6 +512,7 @@ static struct platform_driver realtek_smi_driver = {
 	},
 	.probe  = realtek_smi_probe,
 	.remove = realtek_smi_remove,
+	.shutdown = realtek_smi_shutdown,
 };
 module_platform_driver(realtek_smi_driver);
 
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 2f8cc6686c38..7c0db80eff00 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -3335,13 +3335,29 @@ static int sja1105_probe(struct spi_device *spi)
 static int sja1105_remove(struct spi_device *spi)
 {
 	struct sja1105_private *priv = spi_get_drvdata(spi);
-	struct dsa_switch *ds = priv->ds;
 
-	dsa_unregister_switch(ds);
+	if (!priv)
+		return 0;
+
+	dsa_unregister_switch(priv->ds);
+
+	spi_set_drvdata(spi, NULL);
 
 	return 0;
 }
 
+static void sja1105_shutdown(struct spi_device *spi)
+{
+	struct sja1105_private *priv = spi_get_drvdata(spi);
+
+	if (!priv)
+		return;
+
+	dsa_switch_shutdown(priv->ds);
+
+	spi_set_drvdata(spi, NULL);
+}
+
 static const struct of_device_id sja1105_dt_ids[] = {
 	{ .compatible = "nxp,sja1105e", .data = &sja1105e_info },
 	{ .compatible = "nxp,sja1105t", .data = &sja1105t_info },
@@ -3365,6 +3381,7 @@ static struct spi_driver sja1105_driver = {
 	},
 	.probe  = sja1105_probe,
 	.remove = sja1105_remove,
+	.shutdown = sja1105_shutdown,
 };
 
 module_spi_driver(sja1105_driver);
diff --git a/drivers/net/dsa/vitesse-vsc73xx-core.c b/drivers/net/dsa/vitesse-vsc73xx-core.c
index 19ce4aa0973b..a4b1447ff055 100644
--- a/drivers/net/dsa/vitesse-vsc73xx-core.c
+++ b/drivers/net/dsa/vitesse-vsc73xx-core.c
@@ -1225,6 +1225,12 @@ int vsc73xx_remove(struct vsc73xx *vsc)
 }
 EXPORT_SYMBOL(vsc73xx_remove);
 
+void vsc73xx_shutdown(struct vsc73xx *vsc)
+{
+	dsa_switch_shutdown(vsc->ds);
+}
+EXPORT_SYMBOL(vsc73xx_shutdown);
+
 MODULE_AUTHOR("Linus Walleij <linus.walleij@linaro.org>");
 MODULE_DESCRIPTION("Vitesse VSC7385/7388/7395/7398 driver");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/dsa/vitesse-vsc73xx-platform.c b/drivers/net/dsa/vitesse-vsc73xx-platform.c
index 2a57f337b2a2..fe4b154a0a57 100644
--- a/drivers/net/dsa/vitesse-vsc73xx-platform.c
+++ b/drivers/net/dsa/vitesse-vsc73xx-platform.c
@@ -116,7 +116,26 @@ static int vsc73xx_platform_remove(struct platform_device *pdev)
 {
 	struct vsc73xx_platform *vsc_platform = platform_get_drvdata(pdev);
 
-	return vsc73xx_remove(&vsc_platform->vsc);
+	if (!vsc_platform)
+		return 0;
+
+	vsc73xx_remove(&vsc_platform->vsc);
+
+	platform_set_drvdata(pdev, NULL);
+
+	return 0;
+}
+
+static void vsc73xx_platform_shutdown(struct platform_device *pdev)
+{
+	struct vsc73xx_platform *vsc_platform = platform_get_drvdata(pdev);
+
+	if (!vsc_platform)
+		return;
+
+	vsc73xx_shutdown(&vsc_platform->vsc);
+
+	platform_set_drvdata(pdev, NULL);
 }
 
 static const struct vsc73xx_ops vsc73xx_platform_ops = {
@@ -144,6 +163,7 @@ MODULE_DEVICE_TABLE(of, vsc73xx_of_match);
 static struct platform_driver vsc73xx_platform_driver = {
 	.probe = vsc73xx_platform_probe,
 	.remove = vsc73xx_platform_remove,
+	.shutdown = vsc73xx_platform_shutdown,
 	.driver = {
 		.name = "vsc73xx-platform",
 		.of_match_table = vsc73xx_of_match,
diff --git a/drivers/net/dsa/vitesse-vsc73xx-spi.c b/drivers/net/dsa/vitesse-vsc73xx-spi.c
index 81eca4a5781d..645398901e05 100644
--- a/drivers/net/dsa/vitesse-vsc73xx-spi.c
+++ b/drivers/net/dsa/vitesse-vsc73xx-spi.c
@@ -163,7 +163,26 @@ static int vsc73xx_spi_remove(struct spi_device *spi)
 {
 	struct vsc73xx_spi *vsc_spi = spi_get_drvdata(spi);
 
-	return vsc73xx_remove(&vsc_spi->vsc);
+	if (!vsc_spi)
+		return 0;
+
+	vsc73xx_remove(&vsc_spi->vsc);
+
+	spi_set_drvdata(spi, NULL);
+
+	return 0;
+}
+
+static void vsc73xx_spi_shutdown(struct spi_device *spi)
+{
+	struct vsc73xx_spi *vsc_spi = spi_get_drvdata(spi);
+
+	if (!vsc_spi)
+		return;
+
+	vsc73xx_shutdown(&vsc_spi->vsc);
+
+	spi_set_drvdata(spi, NULL);
 }
 
 static const struct vsc73xx_ops vsc73xx_spi_ops = {
@@ -191,6 +210,7 @@ MODULE_DEVICE_TABLE(of, vsc73xx_of_match);
 static struct spi_driver vsc73xx_spi_driver = {
 	.probe = vsc73xx_spi_probe,
 	.remove = vsc73xx_spi_remove,
+	.shutdown = vsc73xx_spi_shutdown,
 	.driver = {
 		.name = "vsc73xx-spi",
 		.of_match_table = vsc73xx_of_match,
diff --git a/drivers/net/dsa/vitesse-vsc73xx.h b/drivers/net/dsa/vitesse-vsc73xx.h
index 7478f8d4e0a9..30b951504e65 100644
--- a/drivers/net/dsa/vitesse-vsc73xx.h
+++ b/drivers/net/dsa/vitesse-vsc73xx.h
@@ -27,3 +27,4 @@ struct vsc73xx_ops {
 int vsc73xx_is_addr_valid(u8 block, u8 subblock);
 int vsc73xx_probe(struct vsc73xx *vsc);
 int vsc73xx_remove(struct vsc73xx *vsc);
+void vsc73xx_shutdown(struct vsc73xx *vsc);
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 258867eff230..6e29c0e080f6 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -1046,6 +1046,7 @@ static inline int dsa_ndo_eth_ioctl(struct net_device *dev, struct ifreq *ifr,
 
 void dsa_unregister_switch(struct dsa_switch *ds);
 int dsa_register_switch(struct dsa_switch *ds);
+void dsa_switch_shutdown(struct dsa_switch *ds);
 struct dsa_switch *dsa_switch_find(int tree_index, int sw_index);
 #ifdef CONFIG_PM_SLEEP
 int dsa_switch_suspend(struct dsa_switch *ds);
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index eef13cd20f19..fa88e58705f0 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -1562,3 +1562,53 @@ void dsa_unregister_switch(struct dsa_switch *ds)
 	mutex_unlock(&dsa2_mutex);
 }
 EXPORT_SYMBOL_GPL(dsa_unregister_switch);
+
+/* If the DSA master chooses to unregister its net_device on .shutdown, DSA is
+ * blocking that operation from completion, due to the dev_hold taken inside
+ * netdev_upper_dev_link. Unlink the DSA slave interfaces from being uppers of
+ * the DSA master, so that the system can reboot successfully.
+ */
+void dsa_switch_shutdown(struct dsa_switch *ds)
+{
+	struct net_device *master, *slave_dev;
+	LIST_HEAD(unregister_list);
+	struct dsa_port *dp;
+
+	mutex_lock(&dsa2_mutex);
+	rtnl_lock();
+
+	list_for_each_entry(dp, &ds->dst->ports, list) {
+		if (dp->ds != ds)
+			continue;
+
+		if (!dsa_port_is_user(dp))
+			continue;
+
+		master = dp->cpu_dp->master;
+		slave_dev = dp->slave;
+
+		netdev_upper_dev_unlink(master, slave_dev);
+		/* Just unlinking ourselves as uppers of the master is not
+		 * sufficient. When the master net device unregisters, that will
+		 * also call dev_close, which we will catch as NETDEV_GOING_DOWN
+		 * and trigger a dev_close on our own devices (dsa_slave_close).
+		 * In turn, that will call dev_mc_unsync on the master's net
+		 * device. If the master is also a DSA switch port, this will
+		 * trigger dsa_slave_set_rx_mode which will call dev_mc_sync on
+		 * its own master. Lockdep will complain about the fact that
+		 * all cascaded masters have the same dsa_master_addr_list_lock_key,
+		 * which it normally would not do if the cascaded masters would
+		 * be in a proper upper/lower relationship, which we've just
+		 * destroyed.
+		 * To suppress the lockdep warnings, let's actually unregister
+		 * the DSA slave interfaces too, to avoid the nonsensical
+		 * multicast address list synchronization on shutdown.
+		 */
+		unregister_netdevice_queue(slave_dev, &unregister_list);
+	}
+	unregister_netdevice_many(&unregister_list);
+
+	rtnl_unlock();
+	mutex_unlock(&dsa2_mutex);
+}
+EXPORT_SYMBOL_GPL(dsa_switch_shutdown);
-- 
cgit v1.2.3


From 46baae56e1001a771a5d132aa883cb5605013ae2 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 17 Sep 2021 16:34:34 +0300
Subject: net: dsa: hellcreek: be compatible with masters which unregister on
 shutdown

Since commit 2f1e8ea726e9 ("net: dsa: link interfaces with the DSA
master to get rid of lockdep warnings"), DSA gained a requirement which
it did not fulfill, which is to unlink itself from the DSA master at
shutdown time.

Since the hellcreek driver was introduced after the bad commit, it has
never worked with DSA masters which decide to unregister their
net_device on shutdown, effectively hanging the reboot process.

Hellcreek is a platform device driver, so we probably cannot have the
oddities of ->shutdown and ->remove getting both called for the exact
same struct device. But to be in line with the pattern from the other
device drivers which are on slow buses, implement the same "if this then
not that" pattern of either running the ->shutdown or the ->remove hook.
The driver's current ->remove implementation makes that very easy
because it already zeroes out its device_drvdata on ->remove.

Fixes: e4b27ebc780f ("net: dsa: Add DSA driver for Hirschmann Hellcreek switches")
Link: https://lore.kernel.org/netdev/20210909095324.12978-1-LinoSanfilippo@gmx.de/
Reported-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Acked-by: Kurt Kanzenbach <kurt@linutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/hirschmann/hellcreek.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c
index 542cfc4ccb08..354655f9ed00 100644
--- a/drivers/net/dsa/hirschmann/hellcreek.c
+++ b/drivers/net/dsa/hirschmann/hellcreek.c
@@ -1916,6 +1916,9 @@ static int hellcreek_remove(struct platform_device *pdev)
 {
 	struct hellcreek *hellcreek = platform_get_drvdata(pdev);
 
+	if (!hellcreek)
+		return 0;
+
 	hellcreek_hwtstamp_free(hellcreek);
 	hellcreek_ptp_free(hellcreek);
 	dsa_unregister_switch(hellcreek->ds);
@@ -1924,6 +1927,18 @@ static int hellcreek_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static void hellcreek_shutdown(struct platform_device *pdev)
+{
+	struct hellcreek *hellcreek = platform_get_drvdata(pdev);
+
+	if (!hellcreek)
+		return;
+
+	dsa_switch_shutdown(hellcreek->ds);
+
+	platform_set_drvdata(pdev, NULL);
+}
+
 static const struct hellcreek_platform_data de1soc_r1_pdata = {
 	.name		 = "r4c30",
 	.num_ports	 = 4,
@@ -1946,6 +1961,7 @@ MODULE_DEVICE_TABLE(of, hellcreek_of_match);
 static struct platform_driver hellcreek_driver = {
 	.probe	= hellcreek_probe,
 	.remove = hellcreek_remove,
+	.shutdown = hellcreek_shutdown,
 	.driver = {
 		.name = "hellcreek",
 		.of_match_table = hellcreek_of_match,
-- 
cgit v1.2.3


From fe4053078cd0f02a3fa140c43660f327702a9f10 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 17 Sep 2021 16:34:35 +0300
Subject: net: dsa: microchip: ksz8863: be compatible with masters which
 unregister on shutdown

Since commit 2f1e8ea726e9 ("net: dsa: link interfaces with the DSA
master to get rid of lockdep warnings"), DSA gained a requirement which
it did not fulfill, which is to unlink itself from the DSA master at
shutdown time.

Since the Microchip sub-driver for KSZ8863 was introduced after the bad
commit, it has never worked with DSA masters which decide to unregister
their net_device on shutdown, effectively hanging the reboot process.
To fix that, we need to call dsa_switch_shutdown.

Since this driver expects the MDIO bus to be backed by mdio_bitbang, I
don't think there is currently any MDIO bus driver which implements its
->shutdown by redirecting it to ->remove, but in any case, to be
compatible with that pattern, it is necessary to implement an "if this
then not that" scheme, to avoid ->remove and ->shutdown from being
called both for the same struct device.

Fixes: 60a364760002 ("net: dsa: microchip: Add Microchip KSZ8863 SMI based driver support")
Link: https://lore.kernel.org/netdev/20210909095324.12978-1-LinoSanfilippo@gmx.de/
Reported-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/microchip/ksz8863_smi.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/net/dsa/microchip/ksz8863_smi.c b/drivers/net/dsa/microchip/ksz8863_smi.c
index 11293485138c..5883fa7edda2 100644
--- a/drivers/net/dsa/microchip/ksz8863_smi.c
+++ b/drivers/net/dsa/microchip/ksz8863_smi.c
@@ -191,6 +191,18 @@ static void ksz8863_smi_remove(struct mdio_device *mdiodev)
 
 	if (dev)
 		ksz_switch_remove(dev);
+
+	dev_set_drvdata(&mdiodev->dev, NULL);
+}
+
+static void ksz8863_smi_shutdown(struct mdio_device *mdiodev)
+{
+	struct ksz_device *dev = dev_get_drvdata(&mdiodev->dev);
+
+	if (dev)
+		dsa_switch_shutdown(dev->ds);
+
+	dev_set_drvdata(&mdiodev->dev, NULL);
 }
 
 static const struct of_device_id ksz8863_dt_ids[] = {
@@ -203,6 +215,7 @@ MODULE_DEVICE_TABLE(of, ksz8863_dt_ids);
 static struct mdio_driver ksz8863_driver = {
 	.probe	= ksz8863_smi_probe,
 	.remove	= ksz8863_smi_remove,
+	.shutdown = ksz8863_smi_shutdown,
 	.mdiodrv.driver = {
 		.name	= "ksz8863-switch",
 		.of_match_table = ksz8863_dt_ids,
-- 
cgit v1.2.3


From a68e9da48568a0adf5dc817ef81971c0d1aa0672 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 17 Sep 2021 16:34:36 +0300
Subject: net: dsa: xrs700x: be compatible with masters which unregister on
 shutdown

Since commit 2f1e8ea726e9 ("net: dsa: link interfaces with the DSA
master to get rid of lockdep warnings"), DSA gained a requirement which
it did not fulfill, which is to unlink itself from the DSA master at
shutdown time.

Since the Arrow SpeedChips XRS700x driver was introduced after the bad
commit, it has never worked with DSA masters which decide to unregister
their net_device on shutdown, effectively hanging the reboot process.
To fix that, we need to call dsa_switch_shutdown.

These devices can be connected by I2C or by MDIO, and if I search for
I2C or MDIO bus drivers that implement their ->shutdown by redirecting
it to ->remove I don't see any, however this does not mean it would not
be possible. To be compatible with that pattern, it is necessary to
implement an "if this then not that" scheme, to avoid ->remove and
->shutdown from being called both for the same struct device.

Fixes: ee00b24f32eb ("net: dsa: add Arrow SpeedChips XRS700x driver")
Link: https://lore.kernel.org/netdev/20210909095324.12978-1-LinoSanfilippo@gmx.de/
Reported-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: George McCollister <george.mccollister@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/xrs700x/xrs700x.c      |  6 ++++++
 drivers/net/dsa/xrs700x/xrs700x.h      |  1 +
 drivers/net/dsa/xrs700x/xrs700x_i2c.c  | 18 ++++++++++++++++++
 drivers/net/dsa/xrs700x/xrs700x_mdio.c | 18 ++++++++++++++++++
 4 files changed, 43 insertions(+)

diff --git a/drivers/net/dsa/xrs700x/xrs700x.c b/drivers/net/dsa/xrs700x/xrs700x.c
index 130abb0f1438..469420941054 100644
--- a/drivers/net/dsa/xrs700x/xrs700x.c
+++ b/drivers/net/dsa/xrs700x/xrs700x.c
@@ -822,6 +822,12 @@ void xrs700x_switch_remove(struct xrs700x *priv)
 }
 EXPORT_SYMBOL(xrs700x_switch_remove);
 
+void xrs700x_switch_shutdown(struct xrs700x *priv)
+{
+	dsa_switch_shutdown(priv->ds);
+}
+EXPORT_SYMBOL(xrs700x_switch_shutdown);
+
 MODULE_AUTHOR("George McCollister <george.mccollister@gmail.com>");
 MODULE_DESCRIPTION("Arrow SpeedChips XRS700x DSA driver");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/dsa/xrs700x/xrs700x.h b/drivers/net/dsa/xrs700x/xrs700x.h
index ff62cf61b091..4d58257471d2 100644
--- a/drivers/net/dsa/xrs700x/xrs700x.h
+++ b/drivers/net/dsa/xrs700x/xrs700x.h
@@ -40,3 +40,4 @@ struct xrs700x {
 struct xrs700x *xrs700x_switch_alloc(struct device *base, void *devpriv);
 int xrs700x_switch_register(struct xrs700x *priv);
 void xrs700x_switch_remove(struct xrs700x *priv);
+void xrs700x_switch_shutdown(struct xrs700x *priv);
diff --git a/drivers/net/dsa/xrs700x/xrs700x_i2c.c b/drivers/net/dsa/xrs700x/xrs700x_i2c.c
index 489d9385b4f0..6deae388a0d6 100644
--- a/drivers/net/dsa/xrs700x/xrs700x_i2c.c
+++ b/drivers/net/dsa/xrs700x/xrs700x_i2c.c
@@ -109,11 +109,28 @@ static int xrs700x_i2c_remove(struct i2c_client *i2c)
 {
 	struct xrs700x *priv = i2c_get_clientdata(i2c);
 
+	if (!priv)
+		return 0;
+
 	xrs700x_switch_remove(priv);
 
+	i2c_set_clientdata(i2c, NULL);
+
 	return 0;
 }
 
+static void xrs700x_i2c_shutdown(struct i2c_client *i2c)
+{
+	struct xrs700x *priv = i2c_get_clientdata(i2c);
+
+	if (!priv)
+		return;
+
+	xrs700x_switch_shutdown(priv);
+
+	i2c_set_clientdata(i2c, NULL);
+}
+
 static const struct i2c_device_id xrs700x_i2c_id[] = {
 	{ "xrs700x-switch", 0 },
 	{},
@@ -137,6 +154,7 @@ static struct i2c_driver xrs700x_i2c_driver = {
 	},
 	.probe	= xrs700x_i2c_probe,
 	.remove	= xrs700x_i2c_remove,
+	.shutdown = xrs700x_i2c_shutdown,
 	.id_table = xrs700x_i2c_id,
 };
 
diff --git a/drivers/net/dsa/xrs700x/xrs700x_mdio.c b/drivers/net/dsa/xrs700x/xrs700x_mdio.c
index 44f58bee04a4..d01cf1073d49 100644
--- a/drivers/net/dsa/xrs700x/xrs700x_mdio.c
+++ b/drivers/net/dsa/xrs700x/xrs700x_mdio.c
@@ -136,7 +136,24 @@ static void xrs700x_mdio_remove(struct mdio_device *mdiodev)
 {
 	struct xrs700x *priv = dev_get_drvdata(&mdiodev->dev);
 
+	if (!priv)
+		return;
+
 	xrs700x_switch_remove(priv);
+
+	dev_set_drvdata(&mdiodev->dev, NULL);
+}
+
+static void xrs700x_mdio_shutdown(struct mdio_device *mdiodev)
+{
+	struct xrs700x *priv = dev_get_drvdata(&mdiodev->dev);
+
+	if (!priv)
+		return;
+
+	xrs700x_switch_shutdown(priv);
+
+	dev_set_drvdata(&mdiodev->dev, NULL);
 }
 
 static const struct of_device_id __maybe_unused xrs700x_mdio_dt_ids[] = {
@@ -155,6 +172,7 @@ static struct mdio_driver xrs700x_mdio_driver = {
 	},
 	.probe	= xrs700x_mdio_probe,
 	.remove	= xrs700x_mdio_remove,
+	.shutdown = xrs700x_mdio_shutdown,
 };
 
 mdio_module_driver(xrs700x_mdio_driver);
-- 
cgit v1.2.3


From 3ede7f84c7c21f93c5eac611d60eba3f2c765e0f Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Fri, 17 Sep 2021 08:27:10 +0200
Subject: xen-netback: correct success/error reporting for the
 SKB-with-fraglist case

When re-entering the main loop of xenvif_tx_check_gop() a 2nd time, the
special considerations for the head of the SKB no longer apply. Don't
mistakenly report ERROR to the frontend for the first entry in the list,
even if - from all I can tell - this shouldn't matter much as the overall
transmit will need to be considered failed anyway.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Paul Durrant <paul@xen.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netback/netback.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 39a01c2a3058..32d5bc4919d8 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -499,7 +499,7 @@ check_frags:
 				 * the header's copy failed, and they are
 				 * sharing a slot, send an error
 				 */
-				if (i == 0 && sharedslot)
+				if (i == 0 && !first_shinfo && sharedslot)
 					xenvif_idx_release(queue, pending_idx,
 							   XEN_NETIF_RSP_ERROR);
 				else
-- 
cgit v1.2.3


From afd92d82c9d715fb97565408755acad81573591a Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Fri, 17 Sep 2021 16:34:06 +0800
Subject: virtio-net: fix pages leaking when building skb in big mode

We try to use build_skb() if we had sufficient tailroom. But we forget
to release the unused pages chained via private in big mode which will
leak pages. Fixing this by release the pages after building the skb in
big mode.

Cc: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Fixes: fb32856b16ad ("virtio-net: page_to_skb() use build_skb when there's sufficient tailroom")
Signed-off-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/virtio_net.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 271d38c1d9f8..79bd2585ec6b 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -423,6 +423,10 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 
 		skb_reserve(skb, p - buf);
 		skb_put(skb, len);
+
+		page = (struct page *)page->private;
+		if (page)
+			give_pages(rq, page);
 		goto ok;
 	}
 
-- 
cgit v1.2.3


From 7237a494decfa17d0b9d0076e6cee3235719de90 Mon Sep 17 00:00:00 2001
From: Claudiu Manoil <claudiu.manoil@nxp.com>
Date: Fri, 17 Sep 2021 13:22:05 +0300
Subject: enetc: Fix illegal access when reading affinity_hint

irq_set_affinity_hit() stores a reference to the cpumask_t
parameter in the irq descriptor, and that reference can be
accessed later from irq_affinity_hint_proc_show(). Since
the cpu_mask parameter passed to irq_set_affinity_hit() has
only temporary storage (it's on the stack memory), later
accesses to it are illegal. Thus reads from the corresponding
procfs affinity_hint file can result in paging request oops.

The issue is fixed by the get_cpu_mask() helper, which provides
a permanent storage for the cpumask_t parameter.

Fixes: d4fd0404c1c9 ("enetc: Introduce basic PF and VF ENETC ethernet drivers")
Signed-off-by: Claudiu Manoil <claudiu.manoil@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 3ca93adb9662..7f90c27c0e79 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -1879,7 +1879,6 @@ static void enetc_clear_bdrs(struct enetc_ndev_priv *priv)
 static int enetc_setup_irqs(struct enetc_ndev_priv *priv)
 {
 	struct pci_dev *pdev = priv->si->pdev;
-	cpumask_t cpu_mask;
 	int i, j, err;
 
 	for (i = 0; i < priv->bdr_int_num; i++) {
@@ -1908,9 +1907,7 @@ static int enetc_setup_irqs(struct enetc_ndev_priv *priv)
 
 			enetc_wr(hw, ENETC_SIMSITRV(idx), entry);
 		}
-		cpumask_clear(&cpu_mask);
-		cpumask_set_cpu(i % num_online_cpus(), &cpu_mask);
-		irq_set_affinity_hint(irq, &cpu_mask);
+		irq_set_affinity_hint(irq, get_cpu_mask(i % num_online_cpus()));
 	}
 
 	return 0;
-- 
cgit v1.2.3


From 9f7afa05c9522b086327929ae622facab0f0f72b Mon Sep 17 00:00:00 2001
From: Claudiu Manoil <claudiu.manoil@nxp.com>
Date: Fri, 17 Sep 2021 13:22:06 +0300
Subject: enetc: Fix uninitialized struct dim_sample field usage

The only struct dim_sample member that does not get
initialized by dim_update_sample() is comp_ctr. (There
is special API to initialize comp_ctr:
dim_update_sample_with_comps(), and it is currently used
only for RDMA.) comp_ctr is used to compute curr_stats->cmps
and curr_stats->cpe_ratio (see dim_calc_stats()) which in
turn are consumed by the rdma_dim_*() API.  Therefore,
functionally, the net_dim*() API consumers are not affected.
Nevertheless, fix the computation of statistics based
on an uninitialized variable, even if the mentioned statistics
are not used at the moment.

Fixes: ae0e6a5d1627 ("enetc: Add adaptive interrupt coalescing")
Signed-off-by: Claudiu Manoil <claudiu.manoil@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 7f90c27c0e79..042327b9981f 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -419,7 +419,7 @@ static void enetc_rx_dim_work(struct work_struct *w)
 
 static void enetc_rx_net_dim(struct enetc_int_vector *v)
 {
-	struct dim_sample dim_sample;
+	struct dim_sample dim_sample = {};
 
 	v->comp_cnt++;
 
-- 
cgit v1.2.3


From 87758511075ec961486fe78d7548dd709b524433 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Fri, 17 Sep 2021 14:05:47 -0700
Subject: igc: fix build errors for PTP

When IGC=y and PTP_1588_CLOCK=m, the ptp_*() interface family is
not available to the igc driver. Make this driver depend on
PTP_1588_CLOCK_OPTIONAL so that it will build without errors.

Various igc commits have used ptp_*() functions without checking
that PTP_1588_CLOCK is enabled. Fix all of these here.

Fixes these build errors:

ld: drivers/net/ethernet/intel/igc/igc_main.o: in function `igc_msix_other':
igc_main.c:(.text+0x6494): undefined reference to `ptp_clock_event'
ld: igc_main.c:(.text+0x64ef): undefined reference to `ptp_clock_event'
ld: igc_main.c:(.text+0x6559): undefined reference to `ptp_clock_event'
ld: drivers/net/ethernet/intel/igc/igc_ethtool.o: in function `igc_ethtool_get_ts_info':
igc_ethtool.c:(.text+0xc7a): undefined reference to `ptp_clock_index'
ld: drivers/net/ethernet/intel/igc/igc_ptp.o: in function `igc_ptp_feature_enable_i225':
igc_ptp.c:(.text+0x330): undefined reference to `ptp_find_pin'
ld: igc_ptp.c:(.text+0x36f): undefined reference to `ptp_find_pin'
ld: drivers/net/ethernet/intel/igc/igc_ptp.o: in function `igc_ptp_init':
igc_ptp.c:(.text+0x11cd): undefined reference to `ptp_clock_register'
ld: drivers/net/ethernet/intel/igc/igc_ptp.o: in function `igc_ptp_stop':
igc_ptp.c:(.text+0x12dd): undefined reference to `ptp_clock_unregister'
ld: drivers/platform/x86/dell/dell-wmi-privacy.o: in function `dell_privacy_wmi_probe':

Fixes: 64433e5bf40ab ("igc: Enable internal i225 PPS")
Fixes: 60dbede0c4f3d ("igc: Add support for ethtool GET_TS_INFO command")
Fixes: 87938851b6efb ("igc: enable auxiliary PHC functions for the i225")
Fixes: 5f2958052c582 ("igc: Add basic skeleton for PTP")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Ederson de Souza <ederson.desouza@intel.com>
Cc: Tony Nguyen <anthony.l.nguyen@intel.com>
Cc: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Jesse Brandeburg <jesse.brandeburg@intel.com>
Cc: intel-wired-lan@lists.osuosl.org
Acked-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig
index b0b6f90deb7d..ed8ea63bb172 100644
--- a/drivers/net/ethernet/intel/Kconfig
+++ b/drivers/net/ethernet/intel/Kconfig
@@ -335,6 +335,7 @@ config IGC
 	tristate "Intel(R) Ethernet Controller I225-LM/I225-V support"
 	default n
 	depends on PCI
+	depends on PTP_1588_CLOCK_OPTIONAL
 	help
 	  This driver supports Intel(R) Ethernet Controller I225-LM/I225-V
 	  family of adapters.
-- 
cgit v1.2.3


From 48e6d083b3aa006052db687fb26eeceef1d325b6 Mon Sep 17 00:00:00 2001
From: Alejandro Concepcion-Rodriguez <asconcepcion@acoro.eu>
Date: Fri, 17 Sep 2021 18:49:59 +0000
Subject: docs: net: dsa: sja1105: fix reference to sja1105.txt

The file sja1105.txt was converted to nxp,sja1105.yaml.

Signed-off-by: Alejandro Concepcion-Rodriguez <asconcepcion@acoro.eu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/dsa/sja1105.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/networking/dsa/sja1105.rst b/Documentation/networking/dsa/sja1105.rst
index 564caeebe2b2..29b1bae0cf00 100644
--- a/Documentation/networking/dsa/sja1105.rst
+++ b/Documentation/networking/dsa/sja1105.rst
@@ -296,7 +296,7 @@ not available.
 Device Tree bindings and board design
 =====================================
 
-This section references ``Documentation/devicetree/bindings/net/dsa/sja1105.txt``
+This section references ``Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml``
 and aims to showcase some potential switch caveats.
 
 RMII PHY role and out-of-band signaling
-- 
cgit v1.2.3


From 2dcb96bacce36021c2f3eaae0cef607b5bb71ede Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 18 Sep 2021 14:42:35 +0200
Subject: net: core: Correct the sock::sk_lock.owned lockdep annotations

lock_sock_fast() and lock_sock_nested() contain lockdep annotations for the
sock::sk_lock.owned 'mutex'. sock::sk_lock.owned is not a regular mutex. It
is just lockdep wise equivalent. In fact it's an open coded trivial mutex
implementation with some interesting features.

sock::sk_lock.slock is a regular spinlock protecting the 'mutex'
representation sock::sk_lock.owned which is a plain boolean. If 'owned' is
true, then some other task holds the 'mutex', otherwise it is uncontended.
As this locking construct is obviously endangered by lock ordering issues as
any other locking primitive it got lockdep annotated via a dedicated
dependency map sock::sk_lock.dep_map which has to be updated at the lock
and unlock sites.

lock_sock_nested() is a straight forward 'mutex' lock operation:

  might_sleep();
  spin_lock_bh(sock::sk_lock.slock)
  while (!try_lock(sock::sk_lock.owned)) {
      spin_unlock_bh(sock::sk_lock.slock);
      wait_for_release();
      spin_lock_bh(sock::sk_lock.slock);
  }

The lockdep annotation for sock::sk_lock.owned is for unknown reasons
_after_ the lock has been acquired, i.e. after the code block above and
after releasing sock::sk_lock.slock, but inside the bottom halves disabled
region:

  spin_unlock(sock::sk_lock.slock);
  mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
  local_bh_enable();

The placement after the unlock is obvious because otherwise the
mutex_acquire() would nest into the spin lock held region.

But that's from the lockdep perspective still the wrong place:

 1) The mutex_acquire() is issued _after_ the successful acquisition which
    is pointless because in a dead lock scenario this point is never
    reached which means that if the deadlock is the first instance of
    exposing the wrong lock order lockdep does not have a chance to detect
    it.

 2) It only works because lockdep is rather lax on the context from which
    the mutex_acquire() is issued. Acquiring a mutex inside a bottom halves
    and therefore non-preemptible region is obviously invalid, except for a
    trylock which is clearly not the case here.

    This 'works' stops working on RT enabled kernels where the bottom halves
    serialization is done via a local lock, which exposes this misplacement
    because the 'mutex' and the local lock nest the wrong way around and
    lockdep complains rightfully about a lock inversion.

The placement is wrong since the initial commit a5b5bb9a053a ("[PATCH]
lockdep: annotate sk_locks") which introduced this.

Fix it by moving the mutex_acquire() in front of the actual lock
acquisition, which is what the regular mutex_lock() operation does as well.

lock_sock_fast() is not that straight forward. It looks at the first glance
like a convoluted trylock operation:

  spin_lock_bh(sock::sk_lock.slock)
  if (!sock::sk_lock.owned)
      return false;
  while (!try_lock(sock::sk_lock.owned)) {
      spin_unlock_bh(sock::sk_lock.slock);
      wait_for_release();
      spin_lock_bh(sock::sk_lock.slock);
  }
  spin_unlock(sock::sk_lock.slock);
  mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
  local_bh_enable();
  return true;

But that's not the case: lock_sock_fast() is an interesting optimization
for short critical sections which can run with bottom halves disabled and
sock::sk_lock.slock held. This allows to shortcut the 'mutex' operation in
the non contended case by preventing other lockers to acquire
sock::sk_lock.owned because they are blocked on sock::sk_lock.slock, which
in turn avoids the overhead of doing the heavy processing in release_sock()
including waking up wait queue waiters.

In the contended case, i.e. when sock::sk_lock.owned == true the behavior
is the same as lock_sock_nested().

Semantically this shortcut means, that the task acquired the 'mutex' even
if it does not touch the sock::sk_lock.owned field in the non-contended
case. Not telling lockdep about this shortcut acquisition is hiding
potential lock ordering violations in the fast path.

As a consequence the same reasoning as for the above lock_sock_nested()
case vs. the placement of the lockdep annotation applies.

The current placement of the lockdep annotation was just copied from
the original lock_sock(), now renamed to lock_sock_nested(),
implementation.

Fix this by moving the mutex_acquire() in front of the actual lock
acquisition and adding the corresponding mutex_release() into
unlock_sock_fast(). Also document the fast path return case with a comment.

Reported-by: Sebastian Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: netdev@vger.kernel.org
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h |  1 +
 net/core/sock.c    | 37 +++++++++++++++++++++++--------------
 2 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index 66a9a90f9558..c005c3c750e8 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1640,6 +1640,7 @@ static inline void unlock_sock_fast(struct sock *sk, bool slow)
 		release_sock(sk);
 		__release(&sk->sk_lock.slock);
 	} else {
+		mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
 		spin_unlock_bh(&sk->sk_lock.slock);
 	}
 }
diff --git a/net/core/sock.c b/net/core/sock.c
index 62627e868e03..512e629f9780 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3179,17 +3179,15 @@ EXPORT_SYMBOL(sock_init_data);
 
 void lock_sock_nested(struct sock *sk, int subclass)
 {
+	/* The sk_lock has mutex_lock() semantics here. */
+	mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
+
 	might_sleep();
 	spin_lock_bh(&sk->sk_lock.slock);
 	if (sk->sk_lock.owned)
 		__lock_sock(sk);
 	sk->sk_lock.owned = 1;
-	spin_unlock(&sk->sk_lock.slock);
-	/*
-	 * The sk_lock has mutex_lock() semantics here:
-	 */
-	mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
-	local_bh_enable();
+	spin_unlock_bh(&sk->sk_lock.slock);
 }
 EXPORT_SYMBOL(lock_sock_nested);
 
@@ -3227,24 +3225,35 @@ EXPORT_SYMBOL(release_sock);
  */
 bool lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock)
 {
+	/* The sk_lock has mutex_lock() semantics here. */
+	mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
+
 	might_sleep();
 	spin_lock_bh(&sk->sk_lock.slock);
 
-	if (!sk->sk_lock.owned)
+	if (!sk->sk_lock.owned) {
 		/*
-		 * Note : We must disable BH
+		 * Fast path return with bottom halves disabled and
+		 * sock::sk_lock.slock held.
+		 *
+		 * The 'mutex' is not contended and holding
+		 * sock::sk_lock.slock prevents all other lockers to
+		 * proceed so the corresponding unlock_sock_fast() can
+		 * avoid the slow path of release_sock() completely and
+		 * just release slock.
+		 *
+		 * From a semantical POV this is equivalent to 'acquiring'
+		 * the 'mutex', hence the corresponding lockdep
+		 * mutex_release() has to happen in the fast path of
+		 * unlock_sock_fast().
 		 */
 		return false;
+	}
 
 	__lock_sock(sk);
 	sk->sk_lock.owned = 1;
-	spin_unlock(&sk->sk_lock.slock);
-	/*
-	 * The sk_lock has mutex_lock() semantics here:
-	 */
-	mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
 	__acquire(&sk->sk_lock.slock);
-	local_bh_enable();
+	spin_unlock_bh(&sk->sk_lock.slock);
 	return true;
 }
 EXPORT_SYMBOL(lock_sock_fast);
-- 
cgit v1.2.3


From 163957c43d96c2409d9d9d2e94823f7300f6e52c Mon Sep 17 00:00:00 2001
From: Colin Foster <colin.foster@in-advantage.com>
Date: Fri, 17 Sep 2021 08:39:04 -0700
Subject: net: mscc: ocelot: remove buggy and useless write to ANA_PFC_PFC_CFG

A useless write to ANA_PFC_PFC_CFG was left in while refactoring ocelot to
phylink. Since priority flow control is disabled, writing the speed has no
effect.

Further, it was using ethtool.h SPEED_ instead of OCELOT_SPEED_ macros,
which are incorrectly offset for GENMASK.

Lastly, for priority flow control to properly function, some scenarios
would rely on the rate adaptation from the PCS while the MAC speed would
be fixed. So it isn't used, and even if it was, neither "speed" nor
"mac_speed" are necessarily the correct values to be used.

Fixes: e6e12df625f2 ("net: mscc: ocelot: convert to phylink")
Signed-off-by: Colin Foster <colin.foster@in-advantage.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mscc/ocelot.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index c581b955efb3..08be0440af28 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -569,10 +569,6 @@ void ocelot_phylink_mac_link_up(struct ocelot *ocelot, int port,
 	ocelot_port_writel(ocelot_port, DEV_CLOCK_CFG_LINK_SPEED(speed),
 			   DEV_CLOCK_CFG);
 
-	/* No PFC */
-	ocelot_write_gix(ocelot, ANA_PFC_PFC_CFG_FC_LINK_SPEED(speed),
-			 ANA_PFC_PFC_CFG, port);
-
 	/* Core: Enable port for frame transfer */
 	ocelot_fields_write(ocelot, port,
 			    QSYS_SWITCH_PORT_MODE_PORT_ENA, 1);
-- 
cgit v1.2.3


From ba68e9941984792f7e8a7be90b8245eb0d2b4d7b Mon Sep 17 00:00:00 2001
From: Colin Foster <colin.foster@in-advantage.com>
Date: Fri, 17 Sep 2021 08:39:05 -0700
Subject: net: mscc: ocelot: remove buggy duplicate write to DEV_CLOCK_CFG

When updating ocelot to use phylink, a second write to DEV_CLOCK_CFG was
mistakenly left in. It used the variable "speed" which, previously, would
would have been assigned a value of OCELOT_SPEED_1000. In phylink the
variable is be SPEED_1000, which is invalid for the
DEV_CLOCK_LINK_SPEED macro. Removing it as unnecessary and buggy.

Fixes: e6e12df625f2 ("net: mscc: ocelot: convert to phylink")
Signed-off-by: Colin Foster <colin.foster@in-advantage.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mscc/ocelot.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 08be0440af28..729ba826ba17 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -563,12 +563,6 @@ void ocelot_phylink_mac_link_up(struct ocelot *ocelot, int port,
 	ocelot_port_writel(ocelot_port, DEV_MAC_ENA_CFG_RX_ENA |
 			   DEV_MAC_ENA_CFG_TX_ENA, DEV_MAC_ENA_CFG);
 
-	/* Take MAC, Port, Phy (intern) and PCS (SGMII/Serdes) clock out of
-	 * reset
-	 */
-	ocelot_port_writel(ocelot_port, DEV_CLOCK_CFG_LINK_SPEED(speed),
-			   DEV_CLOCK_CFG);
-
 	/* Core: Enable port for frame transfer */
 	ocelot_fields_write(ocelot, port,
 			    QSYS_SWITCH_PORT_MODE_PORT_ENA, 1);
-- 
cgit v1.2.3


From fdb475838539cb516caeeeaed06b4b5bc62c9179 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Fri, 17 Sep 2021 11:20:58 +0200
Subject: net: freescale: drop unneeded MODULE_ALIAS

The MODULE_DEVICE_TABLE already creates proper alias for platform
driver.  Having another MODULE_ALIAS causes the alias to be duplicated.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/fec_main.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 80bd5c629fa0..ec87b370bba1 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -4176,5 +4176,4 @@ static struct platform_driver fec_driver = {
 
 module_platform_driver(fec_driver);
 
-MODULE_ALIAS("platform:"DRIVER_NAME);
 MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From fd292c189a979838622d5e03e15fa688c81dd50b Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 17 Sep 2021 17:29:16 +0300
Subject: net: dsa: tear down devlink port regions when tearing down the
 devlink port on error

Commit 86f8b1c01a0a ("net: dsa: Do not make user port errors fatal")
decided it was fine to ignore errors on certain ports that fail to
probe, and go on with the ports that do probe fine.

Commit fb6ec87f7229 ("net: dsa: Fix type was not set for devlink port")
noticed that devlink_port_type_eth_set(dlp, dp->slave); does not get
called, and devlink notices after a timeout of 3600 seconds and prints a
WARN_ON. So it went ahead to unregister the devlink port. And because
there exists an UNUSED port flavour, we actually re-register the devlink
port as UNUSED.

Commit 08156ba430b4 ("net: dsa: Add devlink port regions support to
DSA") added devlink port regions, which are set up by the driver and not
by DSA.

When we trigger the devlink port deregistration and reregistration as
unused, devlink now prints another WARN_ON, from here:

devlink_port_unregister:
	WARN_ON(!list_empty(&devlink_port->region_list));

So the port still has regions, which makes sense, because they were set
up by the driver, and the driver doesn't know we're unregistering the
devlink port.

Somebody needs to tear them down, and optionally (actually it would be
nice, to be consistent) set them up again for the new devlink port.

But DSA's layering stays in our way quite badly here.

The options I've considered are:

1. Introduce a function in devlink to just change a port's type and
   flavour. No dice, devlink keeps a lot of state, it really wants the
   port to not be registered when you set its parameters, so changing
   anything can only be done by destroying what we currently have and
   recreating it.

2. Make DSA cache the parameters passed to dsa_devlink_port_region_create,
   and the region returned, keep those in a list, then when the devlink
   port unregister needs to take place, the existing devlink regions are
   destroyed by DSA, and we replay the creation of new regions using the
   cached parameters. Problem: mv88e6xxx keeps the region pointers in
   chip->ports[port].region, and these will remain stale after DSA frees
   them. There are many things DSA can do, but updating mv88e6xxx's
   private pointers is not one of them.

3. Just let the driver do it (i.e. introduce a very specific method
   called ds->ops->port_reinit_as_unused, which unregisters its devlink
   port devlink regions, then the old devlink port, then registers the
   new one, then the devlink port regions for it). While it does work,
   as opposed to the others, it's pretty horrible from an API
   perspective and we can do better.

4. Introduce a new pair of methods, ->port_setup and ->port_teardown,
   which in the case of mv88e6xxx must register and unregister the
   devlink port regions. Call these 2 methods when the port must be
   reinitialized as unused.

Naturally, I went for the 4th approach.

Fixes: 08156ba430b4 ("net: dsa: Add devlink port regions support to DSA")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c    | 16 +++++++-
 drivers/net/dsa/mv88e6xxx/devlink.c | 73 +++++--------------------------------
 drivers/net/dsa/mv88e6xxx/devlink.h |  6 ++-
 include/net/dsa.h                   |  8 ++++
 net/dsa/dsa2.c                      | 51 +++++++++++++++++++++++---
 5 files changed, 81 insertions(+), 73 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index fb10422d2c33..8ab0be793811 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -3071,7 +3071,7 @@ static void mv88e6xxx_teardown(struct dsa_switch *ds)
 {
 	mv88e6xxx_teardown_devlink_params(ds);
 	dsa_devlink_resources_unregister(ds);
-	mv88e6xxx_teardown_devlink_regions(ds);
+	mv88e6xxx_teardown_devlink_regions_global(ds);
 }
 
 static int mv88e6xxx_setup(struct dsa_switch *ds)
@@ -3215,7 +3215,7 @@ unlock:
 	if (err)
 		goto out_resources;
 
-	err = mv88e6xxx_setup_devlink_regions(ds);
+	err = mv88e6xxx_setup_devlink_regions_global(ds);
 	if (err)
 		goto out_params;
 
@@ -3229,6 +3229,16 @@ out_resources:
 	return err;
 }
 
+static int mv88e6xxx_port_setup(struct dsa_switch *ds, int port)
+{
+	return mv88e6xxx_setup_devlink_regions_port(ds, port);
+}
+
+static void mv88e6xxx_port_teardown(struct dsa_switch *ds, int port)
+{
+	mv88e6xxx_teardown_devlink_regions_port(ds, port);
+}
+
 /* prod_id for switch families which do not have a PHY model number */
 static const u16 family_prod_id_table[] = {
 	[MV88E6XXX_FAMILY_6341] = MV88E6XXX_PORT_SWITCH_ID_PROD_6341,
@@ -6116,6 +6126,8 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = {
 	.change_tag_protocol	= mv88e6xxx_change_tag_protocol,
 	.setup			= mv88e6xxx_setup,
 	.teardown		= mv88e6xxx_teardown,
+	.port_setup		= mv88e6xxx_port_setup,
+	.port_teardown		= mv88e6xxx_port_teardown,
 	.phylink_validate	= mv88e6xxx_validate,
 	.phylink_mac_link_state	= mv88e6xxx_serdes_pcs_get_state,
 	.phylink_mac_config	= mv88e6xxx_mac_config,
diff --git a/drivers/net/dsa/mv88e6xxx/devlink.c b/drivers/net/dsa/mv88e6xxx/devlink.c
index 0c0f5ea6680c..381068395c63 100644
--- a/drivers/net/dsa/mv88e6xxx/devlink.c
+++ b/drivers/net/dsa/mv88e6xxx/devlink.c
@@ -647,26 +647,25 @@ static struct mv88e6xxx_region mv88e6xxx_regions[] = {
 	},
 };
 
-static void
-mv88e6xxx_teardown_devlink_regions_global(struct mv88e6xxx_chip *chip)
+void mv88e6xxx_teardown_devlink_regions_global(struct dsa_switch *ds)
 {
+	struct mv88e6xxx_chip *chip = ds->priv;
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(mv88e6xxx_regions); i++)
 		dsa_devlink_region_destroy(chip->regions[i]);
 }
 
-static void
-mv88e6xxx_teardown_devlink_regions_port(struct mv88e6xxx_chip *chip,
-					int port)
+void mv88e6xxx_teardown_devlink_regions_port(struct dsa_switch *ds, int port)
 {
+	struct mv88e6xxx_chip *chip = ds->priv;
+
 	dsa_devlink_region_destroy(chip->ports[port].region);
 }
 
-static int mv88e6xxx_setup_devlink_regions_port(struct dsa_switch *ds,
-						struct mv88e6xxx_chip *chip,
-						int port)
+int mv88e6xxx_setup_devlink_regions_port(struct dsa_switch *ds, int port)
 {
+	struct mv88e6xxx_chip *chip = ds->priv;
 	struct devlink_region *region;
 
 	region = dsa_devlink_port_region_create(ds,
@@ -681,40 +680,10 @@ static int mv88e6xxx_setup_devlink_regions_port(struct dsa_switch *ds,
 	return 0;
 }
 
-static void
-mv88e6xxx_teardown_devlink_regions_ports(struct mv88e6xxx_chip *chip)
-{
-	int port;
-
-	for (port = 0; port < mv88e6xxx_num_ports(chip); port++)
-		mv88e6xxx_teardown_devlink_regions_port(chip, port);
-}
-
-static int mv88e6xxx_setup_devlink_regions_ports(struct dsa_switch *ds,
-						 struct mv88e6xxx_chip *chip)
-{
-	int port;
-	int err;
-
-	for (port = 0; port < mv88e6xxx_num_ports(chip); port++) {
-		err = mv88e6xxx_setup_devlink_regions_port(ds, chip, port);
-		if (err)
-			goto out;
-	}
-
-	return 0;
-
-out:
-	while (port-- > 0)
-		mv88e6xxx_teardown_devlink_regions_port(chip, port);
-
-	return err;
-}
-
-static int mv88e6xxx_setup_devlink_regions_global(struct dsa_switch *ds,
-						  struct mv88e6xxx_chip *chip)
+int mv88e6xxx_setup_devlink_regions_global(struct dsa_switch *ds)
 {
 	bool (*cond)(struct mv88e6xxx_chip *chip);
+	struct mv88e6xxx_chip *chip = ds->priv;
 	struct devlink_region_ops *ops;
 	struct devlink_region *region;
 	u64 size;
@@ -753,30 +722,6 @@ out:
 	return PTR_ERR(region);
 }
 
-int mv88e6xxx_setup_devlink_regions(struct dsa_switch *ds)
-{
-	struct mv88e6xxx_chip *chip = ds->priv;
-	int err;
-
-	err = mv88e6xxx_setup_devlink_regions_global(ds, chip);
-	if (err)
-		return err;
-
-	err = mv88e6xxx_setup_devlink_regions_ports(ds, chip);
-	if (err)
-		mv88e6xxx_teardown_devlink_regions_global(chip);
-
-	return err;
-}
-
-void mv88e6xxx_teardown_devlink_regions(struct dsa_switch *ds)
-{
-	struct mv88e6xxx_chip *chip = ds->priv;
-
-	mv88e6xxx_teardown_devlink_regions_ports(chip);
-	mv88e6xxx_teardown_devlink_regions_global(chip);
-}
-
 int mv88e6xxx_devlink_info_get(struct dsa_switch *ds,
 			       struct devlink_info_req *req,
 			       struct netlink_ext_ack *extack)
diff --git a/drivers/net/dsa/mv88e6xxx/devlink.h b/drivers/net/dsa/mv88e6xxx/devlink.h
index 3d72db3dcf95..65ce6a6858b9 100644
--- a/drivers/net/dsa/mv88e6xxx/devlink.h
+++ b/drivers/net/dsa/mv88e6xxx/devlink.h
@@ -12,8 +12,10 @@ int mv88e6xxx_devlink_param_get(struct dsa_switch *ds, u32 id,
 				struct devlink_param_gset_ctx *ctx);
 int mv88e6xxx_devlink_param_set(struct dsa_switch *ds, u32 id,
 				struct devlink_param_gset_ctx *ctx);
-int mv88e6xxx_setup_devlink_regions(struct dsa_switch *ds);
-void mv88e6xxx_teardown_devlink_regions(struct dsa_switch *ds);
+int mv88e6xxx_setup_devlink_regions_global(struct dsa_switch *ds);
+void mv88e6xxx_teardown_devlink_regions_global(struct dsa_switch *ds);
+int mv88e6xxx_setup_devlink_regions_port(struct dsa_switch *ds, int port);
+void mv88e6xxx_teardown_devlink_regions_port(struct dsa_switch *ds, int port);
 
 int mv88e6xxx_devlink_info_get(struct dsa_switch *ds,
 			       struct devlink_info_req *req,
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 6e29c0e080f6..d784e76113b8 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -585,8 +585,16 @@ struct dsa_switch_ops {
 	int	(*change_tag_protocol)(struct dsa_switch *ds, int port,
 				       enum dsa_tag_protocol proto);
 
+	/* Optional switch-wide initialization and destruction methods */
 	int	(*setup)(struct dsa_switch *ds);
 	void	(*teardown)(struct dsa_switch *ds);
+
+	/* Per-port initialization and destruction methods. Mandatory if the
+	 * driver registers devlink port regions, optional otherwise.
+	 */
+	int	(*port_setup)(struct dsa_switch *ds, int port);
+	void	(*port_teardown)(struct dsa_switch *ds, int port);
+
 	u32	(*get_phy_flags)(struct dsa_switch *ds, int port);
 
 	/*
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index fa88e58705f0..f14897d9b31d 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -429,6 +429,7 @@ static int dsa_port_setup(struct dsa_port *dp)
 {
 	struct devlink_port *dlp = &dp->devlink_port;
 	bool dsa_port_link_registered = false;
+	struct dsa_switch *ds = dp->ds;
 	bool dsa_port_enabled = false;
 	int err = 0;
 
@@ -438,6 +439,12 @@ static int dsa_port_setup(struct dsa_port *dp)
 	INIT_LIST_HEAD(&dp->fdbs);
 	INIT_LIST_HEAD(&dp->mdbs);
 
+	if (ds->ops->port_setup) {
+		err = ds->ops->port_setup(ds, dp->index);
+		if (err)
+			return err;
+	}
+
 	switch (dp->type) {
 	case DSA_PORT_TYPE_UNUSED:
 		dsa_port_disable(dp);
@@ -480,8 +487,11 @@ static int dsa_port_setup(struct dsa_port *dp)
 		dsa_port_disable(dp);
 	if (err && dsa_port_link_registered)
 		dsa_port_link_unregister_of(dp);
-	if (err)
+	if (err) {
+		if (ds->ops->port_teardown)
+			ds->ops->port_teardown(ds, dp->index);
 		return err;
+	}
 
 	dp->setup = true;
 
@@ -533,11 +543,15 @@ static int dsa_port_devlink_setup(struct dsa_port *dp)
 static void dsa_port_teardown(struct dsa_port *dp)
 {
 	struct devlink_port *dlp = &dp->devlink_port;
+	struct dsa_switch *ds = dp->ds;
 	struct dsa_mac_addr *a, *tmp;
 
 	if (!dp->setup)
 		return;
 
+	if (ds->ops->port_teardown)
+		ds->ops->port_teardown(ds, dp->index);
+
 	devlink_port_type_clear(dlp);
 
 	switch (dp->type) {
@@ -581,6 +595,36 @@ static void dsa_port_devlink_teardown(struct dsa_port *dp)
 	dp->devlink_port_setup = false;
 }
 
+/* Destroy the current devlink port, and create a new one which has the UNUSED
+ * flavour. At this point, any call to ds->ops->port_setup has been already
+ * balanced out by a call to ds->ops->port_teardown, so we know that any
+ * devlink port regions the driver had are now unregistered. We then call its
+ * ds->ops->port_setup again, in order for the driver to re-create them on the
+ * new devlink port.
+ */
+static int dsa_port_reinit_as_unused(struct dsa_port *dp)
+{
+	struct dsa_switch *ds = dp->ds;
+	int err;
+
+	dsa_port_devlink_teardown(dp);
+	dp->type = DSA_PORT_TYPE_UNUSED;
+	err = dsa_port_devlink_setup(dp);
+	if (err)
+		return err;
+
+	if (ds->ops->port_setup) {
+		/* On error, leave the devlink port registered,
+		 * dsa_switch_teardown will clean it up later.
+		 */
+		err = ds->ops->port_setup(ds, dp->index);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 static int dsa_devlink_info_get(struct devlink *dl,
 				struct devlink_info_req *req,
 				struct netlink_ext_ack *extack)
@@ -938,12 +982,9 @@ static int dsa_tree_setup_switches(struct dsa_switch_tree *dst)
 	list_for_each_entry(dp, &dst->ports, list) {
 		err = dsa_port_setup(dp);
 		if (err) {
-			dsa_port_devlink_teardown(dp);
-			dp->type = DSA_PORT_TYPE_UNUSED;
-			err = dsa_port_devlink_setup(dp);
+			err = dsa_port_reinit_as_unused(dp);
 			if (err)
 				goto teardown;
-			continue;
 		}
 	}
 
-- 
cgit v1.2.3


From 029497e66bdc762e001880e4c85a91f35a54b1e2 Mon Sep 17 00:00:00 2001
From: Christian Lamparter <chunkeey@gmail.com>
Date: Sun, 19 Sep 2021 13:57:25 +0200
Subject: net: bgmac-bcma: handle deferred probe error due to mac-address
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Due to the inclusion of nvmem handling into the mac-address getter
function of_get_mac_address() by
commit d01f449c008a ("of_net: add NVMEM support to of_get_mac_address")
it is now possible to get a -EPROBE_DEFER return code. Which did cause
bgmac to assign a random ethernet address.

This exact issue happened on my Meraki MR32. The nvmem provider is
an EEPROM (at24c64) which gets instantiated once the module
driver is loaded... This happens once the filesystem becomes available.

With this patch, bgmac_probe() will propagate the -EPROBE_DEFER error.
Then the driver subsystem will reschedule the probe at a later time.

Cc: Petr Štetiar <ynezz@true.cz>
Cc: Michael Walle <michael@walle.cc>
Fixes: d01f449c008a ("of_net: add NVMEM support to of_get_mac_address")
Signed-off-by: Christian Lamparter <chunkeey@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bgmac-bcma.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma.c b/drivers/net/ethernet/broadcom/bgmac-bcma.c
index 85fa0ab7201c..9513cfb5ba58 100644
--- a/drivers/net/ethernet/broadcom/bgmac-bcma.c
+++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c
@@ -129,6 +129,8 @@ static int bgmac_probe(struct bcma_device *core)
 	bcma_set_drvdata(core, bgmac);
 
 	err = of_get_mac_address(bgmac->dev->of_node, bgmac->net_dev->dev_addr);
+	if (err == -EPROBE_DEFER)
+		return err;
 
 	/* If no MAC address assigned via device tree, check SPROM */
 	if (err) {
-- 
cgit v1.2.3


From 48514a22333099b93c33ea1c7c97dad3f7a611ce Mon Sep 17 00:00:00 2001
From: Shuah Khan <skhan@linuxfoundation.org>
Date: Fri, 17 Sep 2021 13:26:14 -0600
Subject: selftests: net: af_unix: Fix incorrect args in test result msg
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the args to fprintf(). Splitting the message ends up passing
incorrect arg for "sigurg %d" and an extra arg overall. The test
result message ends up incorrect.

test_unix_oob.c: In function ‘main’:
test_unix_oob.c:274:43: warning: format ‘%d’ expects argument of type ‘int’, but argument 3 has type ‘char *’ [-Wformat=]
  274 |   fprintf(stderr, "Test 3 failed, sigurg %d len %d OOB %c ",
      |                                          ~^
      |                                           |
      |                                           int
      |                                          %s
  275 |   "atmark %d\n", signal_recvd, len, oob, atmark);
      |   ~~~~~~~~~~~~~
      |   |
      |   char *
test_unix_oob.c:274:19: warning: too many arguments for format [-Wformat-extra-args]
  274 |   fprintf(stderr, "Test 3 failed, sigurg %d len %d OOB %c ",

Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/af_unix/test_unix_oob.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c
index 0f3e3763f4f8..3dece8b29253 100644
--- a/tools/testing/selftests/net/af_unix/test_unix_oob.c
+++ b/tools/testing/selftests/net/af_unix/test_unix_oob.c
@@ -271,8 +271,9 @@ main(int argc, char **argv)
 	read_oob(pfd, &oob);
 
 	if (!signal_recvd || len != 127 || oob != '%' || atmark != 1) {
-		fprintf(stderr, "Test 3 failed, sigurg %d len %d OOB %c ",
-		"atmark %d\n", signal_recvd, len, oob, atmark);
+		fprintf(stderr,
+			"Test 3 failed, sigurg %d len %d OOB %c atmark %d\n",
+			signal_recvd, len, oob, atmark);
 		die(1);
 	}
 
-- 
cgit v1.2.3


From 72a3c58d18fd780eecd80178bb2132ce741a0a74 Mon Sep 17 00:00:00 2001
From: Lama Kayal <lkayal@nvidia.com>
Date: Sun, 19 Sep 2021 14:55:45 +0300
Subject: net/mlx4_en: Resolve bad operstate value

Any link state change that's done prior to net device registration
isn't reflected on the state, thus the operational state is left
obsolete, with 'UNKNOWN' status.

To resolve the issue, query link state from FW upon open operations
to ensure operational state is updated.

Fixes: c27a02cd94d6 ("mlx4_en: Add driver for Mellanox ConnectX 10GbE NIC")
Signed-off-by: Lama Kayal <lkayal@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 47 ++++++++++++++++----------
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h   |  1 -
 2 files changed, 29 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index a2f61a87cef8..35154635ec3a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1269,7 +1269,6 @@ static void mlx4_en_do_set_rx_mode(struct work_struct *work)
 	if (!netif_carrier_ok(dev)) {
 		if (!mlx4_en_QUERY_PORT(mdev, priv->port)) {
 			if (priv->port_state.link_state) {
-				priv->last_link_state = MLX4_DEV_EVENT_PORT_UP;
 				netif_carrier_on(dev);
 				en_dbg(LINK, priv, "Link Up\n");
 			}
@@ -1557,26 +1556,36 @@ static void mlx4_en_service_task(struct work_struct *work)
 	mutex_unlock(&mdev->state_lock);
 }
 
-static void mlx4_en_linkstate(struct work_struct *work)
+static void mlx4_en_linkstate(struct mlx4_en_priv *priv)
+{
+	struct mlx4_en_port_state *port_state = &priv->port_state;
+	struct mlx4_en_dev *mdev = priv->mdev;
+	struct net_device *dev = priv->dev;
+	bool up;
+
+	if (mlx4_en_QUERY_PORT(mdev, priv->port))
+		port_state->link_state = MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN;
+
+	up = port_state->link_state == MLX4_PORT_STATE_DEV_EVENT_PORT_UP;
+	if (up == netif_carrier_ok(dev))
+		netif_carrier_event(dev);
+	if (!up) {
+		en_info(priv, "Link Down\n");
+		netif_carrier_off(dev);
+	} else {
+		en_info(priv, "Link Up\n");
+		netif_carrier_on(dev);
+	}
+}
+
+static void mlx4_en_linkstate_work(struct work_struct *work)
 {
 	struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv,
 						 linkstate_task);
 	struct mlx4_en_dev *mdev = priv->mdev;
-	int linkstate = priv->link_state;
 
 	mutex_lock(&mdev->state_lock);
-	/* If observable port state changed set carrier state and
-	 * report to system log */
-	if (priv->last_link_state != linkstate) {
-		if (linkstate == MLX4_DEV_EVENT_PORT_DOWN) {
-			en_info(priv, "Link Down\n");
-			netif_carrier_off(priv->dev);
-		} else {
-			en_info(priv, "Link Up\n");
-			netif_carrier_on(priv->dev);
-		}
-	}
-	priv->last_link_state = linkstate;
+	mlx4_en_linkstate(priv);
 	mutex_unlock(&mdev->state_lock);
 }
 
@@ -2079,9 +2088,11 @@ static int mlx4_en_open(struct net_device *dev)
 	mlx4_en_clear_stats(dev);
 
 	err = mlx4_en_start_port(dev);
-	if (err)
+	if (err) {
 		en_err(priv, "Failed starting port:%d\n", priv->port);
-
+		goto out;
+	}
+	mlx4_en_linkstate(priv);
 out:
 	mutex_unlock(&mdev->state_lock);
 	return err;
@@ -3168,7 +3179,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 	spin_lock_init(&priv->stats_lock);
 	INIT_WORK(&priv->rx_mode_task, mlx4_en_do_set_rx_mode);
 	INIT_WORK(&priv->restart_task, mlx4_en_restart);
-	INIT_WORK(&priv->linkstate_task, mlx4_en_linkstate);
+	INIT_WORK(&priv->linkstate_task, mlx4_en_linkstate_work);
 	INIT_DELAYED_WORK(&priv->stats_task, mlx4_en_do_get_stats);
 	INIT_DELAYED_WORK(&priv->service_task, mlx4_en_service_task);
 #ifdef CONFIG_RFS_ACCEL
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index f3d1a20201ef..6bf558c5ec10 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -552,7 +552,6 @@ struct mlx4_en_priv {
 
 	struct mlx4_hwq_resources res;
 	int link_state;
-	int last_link_state;
 	bool port_up;
 	int port;
 	int registered;
-- 
cgit v1.2.3


From e30cd812dffadc58241ae378e48728e6a161becd Mon Sep 17 00:00:00 2001
From: Shuah Khan <skhan@linuxfoundation.org>
Date: Fri, 17 Sep 2021 15:53:56 -0600
Subject: selftests: net: af_unix: Fix makefile to use TEST_GEN_PROGS

Makefile uses TEST_PROGS instead of TEST_GEN_PROGS to define
executables. TEST_PROGS is for shell scripts that need to be
installed and run by the common lib.mk framework. The common
framework doesn't touch TEST_PROGS when it does build and clean.

As a result "make kselftest-clean" and "make clean" fail to remove
executables. Run and install work because the common framework runs
and installs TEST_PROGS. Build works because the Makefile defines
"all" rule which is unnecessary if TEST_GEN_PROGS is used.

Use TEST_GEN_PROGS so the common framework can handle build/run/
install/clean properly.

Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/af_unix/Makefile | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
index cfc7f4f97fd1..df341648f818 100644
--- a/tools/testing/selftests/net/af_unix/Makefile
+++ b/tools/testing/selftests/net/af_unix/Makefile
@@ -1,5 +1,2 @@
-##TEST_GEN_FILES := test_unix_oob
-TEST_PROGS := test_unix_oob
+TEST_GEN_PROGS := test_unix_oob
 include ../../lib.mk
-
-all: $(TEST_PROGS)
-- 
cgit v1.2.3


From 3765996e4f0b8a755cab215a08df744490c76052 Mon Sep 17 00:00:00 2001
From: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Date: Sat, 18 Sep 2021 16:52:32 +0800
Subject: napi: fix race inside napi_enable

The process will cause napi.state to contain NAPI_STATE_SCHED and
not in the poll_list, which will cause napi_disable() to get stuck.

The prefix "NAPI_STATE_" is removed in the figure below, and
NAPI_STATE_HASHED is ignored in napi.state.

                      CPU0       |                   CPU1       | napi.state
===============================================================================
napi_disable()                   |                              | SCHED | NPSVC
napi_enable()                    |                              |
{                                |                              |
    smp_mb__before_atomic();     |                              |
    clear_bit(SCHED, &n->state); |                              | NPSVC
                                 | napi_schedule_prep()         | SCHED | NPSVC
                                 | napi_poll()                  |
                                 |   napi_complete_done()       |
                                 |   {                          |
                                 |      if (n->state & (NPSVC | | (1)
                                 |               _BUSY_POLL)))  |
                                 |           return false;      |
                                 |     ................         |
                                 |   }                          | SCHED | NPSVC
                                 |                              |
    clear_bit(NPSVC, &n->state); |                              | SCHED
}                                |                              |
                                 |                              |
napi_schedule_prep()             |                              | SCHED | MISSED (2)

(1) Here return direct. Because of NAPI_STATE_NPSVC exists.
(2) NAPI_STATE_SCHED exists. So not add napi.poll_list to sd->poll_list

Since NAPI_STATE_SCHED already exists and napi is not in the
sd->poll_list queue, NAPI_STATE_SCHED cannot be cleared and will always
exist.

1. This will cause this queue to no longer receive packets.
2. If you encounter napi_disable under the protection of rtnl_lock, it
   will cause the entire rtnl_lock to be locked, affecting the overall
   system.

This patch uses cmpxchg to implement napi_enable(), which ensures that
there will be no race due to the separation of clear two bits.

Fixes: 2d8bff12699abc ("netpoll: Close race condition between poll_one_napi and napi_disable")
Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Reviewed-by: Dust Li <dust.li@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 74fd402d26dd..7ee9fecd3aff 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6923,12 +6923,16 @@ EXPORT_SYMBOL(napi_disable);
  */
 void napi_enable(struct napi_struct *n)
 {
-	BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
-	smp_mb__before_atomic();
-	clear_bit(NAPI_STATE_SCHED, &n->state);
-	clear_bit(NAPI_STATE_NPSVC, &n->state);
-	if (n->dev->threaded && n->thread)
-		set_bit(NAPI_STATE_THREADED, &n->state);
+	unsigned long val, new;
+
+	do {
+		val = READ_ONCE(n->state);
+		BUG_ON(!test_bit(NAPI_STATE_SCHED, &val));
+
+		new = val & ~(NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC);
+		if (n->dev->threaded && n->thread)
+			new |= NAPIF_STATE_THREADED;
+	} while (cmpxchg(&n->state, val, new) != val);
 }
 EXPORT_SYMBOL(napi_enable);
 
-- 
cgit v1.2.3


From 563f23b002534176f49524b5ca0e1d94d8906c40 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Fri, 17 Sep 2021 16:02:18 +0300
Subject: nexthop: Fix division by zero while replacing a resilient group

The resilient nexthop group torture tests in fib_nexthop.sh exposed a
possible division by zero while replacing a resilient group [1]. The
division by zero occurs when the data path sees a resilient nexthop
group with zero buckets.

The tests replace a resilient nexthop group in a loop while traffic is
forwarded through it. The tests do not specify the number of buckets
while performing the replacement, resulting in the kernel allocating a
stub resilient table (i.e, 'struct nh_res_table') with zero buckets.

This table should never be visible to the data path, but the old nexthop
group (i.e., 'oldg') might still be used by the data path when the stub
table is assigned to it.

Fix this by only assigning the stub table to the old nexthop group after
making sure the group is no longer used by the data path.

Tested with fib_nexthops.sh:

Tests passed: 222
Tests failed:   0

[1]
 divide error: 0000 [#1] PREEMPT SMP KASAN
 CPU: 0 PID: 1850 Comm: ping Not tainted 5.14.0-custom-10271-ga86eb53057fe #1107
 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-4.fc34 04/01/2014
 RIP: 0010:nexthop_select_path+0x2d2/0x1a80
[...]
 Call Trace:
  fib_select_multipath+0x79b/0x1530
  fib_select_path+0x8fb/0x1c10
  ip_route_output_key_hash_rcu+0x1198/0x2da0
  ip_route_output_key_hash+0x190/0x340
  ip_route_output_flow+0x21/0x120
  raw_sendmsg+0x91d/0x2e10
  inet_sendmsg+0x9e/0xe0
  __sys_sendto+0x23d/0x360
  __x64_sys_sendto+0xe1/0x1b0
  do_syscall_64+0x35/0x80
  entry_SYSCALL_64_after_hwframe+0x44/0xae

Cc: stable@vger.kernel.org
Fixes: 283a72a5599e ("nexthop: Add implementation of resilient next-hop groups")
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/nexthop.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 75ca4b6e484f..0e75fd3e57b4 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -1982,6 +1982,8 @@ static int replace_nexthop_grp(struct net *net, struct nexthop *old,
 	rcu_assign_pointer(old->nh_grp, newg);
 
 	if (newg->resilient) {
+		/* Make sure concurrent readers are not using 'oldg' anymore. */
+		synchronize_net();
 		rcu_assign_pointer(oldg->res_table, tmp_table);
 		rcu_assign_pointer(oldg->spare->res_table, tmp_table);
 	}
-- 
cgit v1.2.3


From 5bed8b0704c9ecccc8f4a2c377d7c8e21090a82e Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Mon, 20 Sep 2021 02:51:52 -0400
Subject: bnxt_en: Fix TX timeout when TX ring size is set to the smallest

The smallest TX ring size we support must fit a TX SKB with MAX_SKB_FRAGS
+ 1.  Because the first TX BD for a packet is always a long TX BD, we
need an extra TX BD to fit this packet.  Define BNXT_MIN_TX_DESC_CNT with
this value to make this more clear.  The current code uses a minimum
that is off by 1.  Fix it using this constant.

The tx_wake_thresh to determine when to wake up the TX queue is half the
ring size but we must have at least BNXT_MIN_TX_DESC_CNT for the next
packet which may have maximum fragments.  So the comparison of the
available TX BDs with tx_wake_thresh should be >= instead of > in the
current code.  Otherwise, at the smallest ring size, we will never wake
up the TX queue and will cause TX timeout.

Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.")
Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadocm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c         | 8 ++++----
 drivers/net/ethernet/broadcom/bnxt/bnxt.h         | 5 +++++
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 2 +-
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 037767b370d5..62f84cc91e4d 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -391,7 +391,7 @@ static bool bnxt_txr_netif_try_stop_queue(struct bnxt *bp,
 	 * netif_tx_queue_stopped().
 	 */
 	smp_mb();
-	if (bnxt_tx_avail(bp, txr) > bp->tx_wake_thresh) {
+	if (bnxt_tx_avail(bp, txr) >= bp->tx_wake_thresh) {
 		netif_tx_wake_queue(txq);
 		return false;
 	}
@@ -764,7 +764,7 @@ next_tx_int:
 	smp_mb();
 
 	if (unlikely(netif_tx_queue_stopped(txq)) &&
-	    bnxt_tx_avail(bp, txr) > bp->tx_wake_thresh &&
+	    bnxt_tx_avail(bp, txr) >= bp->tx_wake_thresh &&
 	    READ_ONCE(txr->dev_state) != BNXT_DEV_STATE_CLOSING)
 		netif_tx_wake_queue(txq);
 }
@@ -2416,7 +2416,7 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
 		if (TX_CMP_TYPE(txcmp) == CMP_TYPE_TX_L2_CMP) {
 			tx_pkts++;
 			/* return full budget so NAPI will complete. */
-			if (unlikely(tx_pkts > bp->tx_wake_thresh)) {
+			if (unlikely(tx_pkts >= bp->tx_wake_thresh)) {
 				rx_pkts = budget;
 				raw_cons = NEXT_RAW_CMP(raw_cons);
 				if (budget)
@@ -3640,7 +3640,7 @@ static int bnxt_init_tx_rings(struct bnxt *bp)
 	u16 i;
 
 	bp->tx_wake_thresh = max_t(int, bp->tx_ring_size / 2,
-				   MAX_SKB_FRAGS + 1);
+				   BNXT_MIN_TX_DESC_CNT);
 
 	for (i = 0; i < bp->tx_nr_rings; i++) {
 		struct bnxt_tx_ring_info *txr = &bp->tx_ring[i];
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index ec046e7a2484..19fe6478e9b4 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -629,6 +629,11 @@ struct nqe_cn {
 #define BNXT_MAX_RX_JUM_DESC_CNT	(RX_DESC_CNT * MAX_RX_AGG_PAGES - 1)
 #define BNXT_MAX_TX_DESC_CNT		(TX_DESC_CNT * MAX_TX_PAGES - 1)
 
+/* Minimum TX BDs for a TX packet with MAX_SKB_FRAGS + 1.  We need one extra
+ * BD because the first TX BD is always a long BD.
+ */
+#define BNXT_MIN_TX_DESC_CNT		(MAX_SKB_FRAGS + 2)
+
 #define RX_RING(x)	(((x) & ~(RX_DESC_CNT - 1)) >> (BNXT_PAGE_SHIFT - 4))
 #define RX_IDX(x)	((x) & (RX_DESC_CNT - 1))
 
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index b056e3c29bbd..7260910e75fb 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -798,7 +798,7 @@ static int bnxt_set_ringparam(struct net_device *dev,
 
 	if ((ering->rx_pending > BNXT_MAX_RX_DESC_CNT) ||
 	    (ering->tx_pending > BNXT_MAX_TX_DESC_CNT) ||
-	    (ering->tx_pending <= MAX_SKB_FRAGS))
+	    (ering->tx_pending < BNXT_MIN_TX_DESC_CNT))
 		return -EINVAL;
 
 	if (netif_running(dev))
-- 
cgit v1.2.3


From 42a99a0be307562c1bfef32bad8f89aa3c428edd Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 20 Sep 2021 11:57:49 +0200
Subject: ptp: ocp: add COMMON_CLK dependency

Without CONFIG_COMMON_CLK, this fails to link:

arm-linux-gnueabi-ld: drivers/ptp/ptp_ocp.o: in function `ptp_ocp_register_i2c':
ptp_ocp.c:(.text+0xcc0): undefined reference to `__clk_hw_register_fixed_rate'
arm-linux-gnueabi-ld: ptp_ocp.c:(.text+0xcf4): undefined reference to `devm_clk_hw_register_clkdev'
arm-linux-gnueabi-ld: drivers/ptp/ptp_ocp.o: in function `ptp_ocp_detach':
ptp_ocp.c:(.text+0x1c24): undefined reference to `clk_hw_unregister_fixed_rate'

Fixes: a7e1abad13f3 ("ptp: Add clock driver for the OpenCompute TimeCard.")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ptp/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig
index f02bedf41264..458218f88c5e 100644
--- a/drivers/ptp/Kconfig
+++ b/drivers/ptp/Kconfig
@@ -174,6 +174,7 @@ config PTP_1588_CLOCK_OCP
 	depends on I2C && MTD
 	depends on SERIAL_8250
 	depends on !S390
+	depends on COMMON_CLK
 	select NET_DEVLINK
 	help
 	  This driver adds support for an OpenCompute time card.
-- 
cgit v1.2.3


From e184cec5e29d8eb3c3435b12a9074b75e2d69e4a Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Wed, 15 Sep 2021 21:52:06 +0800
Subject: net: hns3: fix change RSS 'hfunc' ineffective issue

When user change rss 'hfunc' without set rss 'hkey' by ethtool
-X command, the driver will ignore the 'hfunc' for the hkey is
NULL. It's unreasonable. So fix it.

Fixes: 46a3df9f9718 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support")
Fixes: 374ad291762a ("net: hns3: Add RSS general configuration support for VF")
Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 45 ++++++++++++-------
 .../ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c  | 52 ++++++++++++++--------
 2 files changed, 64 insertions(+), 33 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index f1e46ba799f9..36c8741445e8 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -4741,6 +4741,24 @@ static int hclge_get_rss(struct hnae3_handle *handle, u32 *indir,
 	return 0;
 }
 
+static int hclge_parse_rss_hfunc(struct hclge_vport *vport, const u8 hfunc,
+				 u8 *hash_algo)
+{
+	switch (hfunc) {
+	case ETH_RSS_HASH_TOP:
+		*hash_algo = HCLGE_RSS_HASH_ALGO_TOEPLITZ;
+		return 0;
+	case ETH_RSS_HASH_XOR:
+		*hash_algo = HCLGE_RSS_HASH_ALGO_SIMPLE;
+		return 0;
+	case ETH_RSS_HASH_NO_CHANGE:
+		*hash_algo = vport->rss_algo;
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
 static int hclge_set_rss(struct hnae3_handle *handle, const u32 *indir,
 			 const  u8 *key, const  u8 hfunc)
 {
@@ -4750,30 +4768,27 @@ static int hclge_set_rss(struct hnae3_handle *handle, const u32 *indir,
 	u8 hash_algo;
 	int ret, i;
 
+	ret = hclge_parse_rss_hfunc(vport, hfunc, &hash_algo);
+	if (ret) {
+		dev_err(&hdev->pdev->dev, "invalid hfunc type %u\n", hfunc);
+		return ret;
+	}
+
 	/* Set the RSS Hash Key if specififed by the user */
 	if (key) {
-		switch (hfunc) {
-		case ETH_RSS_HASH_TOP:
-			hash_algo = HCLGE_RSS_HASH_ALGO_TOEPLITZ;
-			break;
-		case ETH_RSS_HASH_XOR:
-			hash_algo = HCLGE_RSS_HASH_ALGO_SIMPLE;
-			break;
-		case ETH_RSS_HASH_NO_CHANGE:
-			hash_algo = vport->rss_algo;
-			break;
-		default:
-			return -EINVAL;
-		}
-
 		ret = hclge_set_rss_algo_key(hdev, hash_algo, key);
 		if (ret)
 			return ret;
 
 		/* Update the shadow RSS key with user specified qids */
 		memcpy(vport->rss_hash_key, key, HCLGE_RSS_KEY_SIZE);
-		vport->rss_algo = hash_algo;
+	} else {
+		ret = hclge_set_rss_algo_key(hdev, hash_algo,
+					     vport->rss_hash_key);
+		if (ret)
+			return ret;
 	}
+	vport->rss_algo = hash_algo;
 
 	/* Update the shadow RSS table with user specified qids */
 	for (i = 0; i < ae_dev->dev_specs.rss_ind_tbl_size; i++)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index a69e892277b3..5fdac8685f95 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -816,40 +816,56 @@ static int hclgevf_get_rss(struct hnae3_handle *handle, u32 *indir, u8 *key,
 	return 0;
 }
 
+static int hclgevf_parse_rss_hfunc(struct hclgevf_dev *hdev, const u8 hfunc,
+				   u8 *hash_algo)
+{
+	switch (hfunc) {
+	case ETH_RSS_HASH_TOP:
+		*hash_algo = HCLGEVF_RSS_HASH_ALGO_TOEPLITZ;
+		return 0;
+	case ETH_RSS_HASH_XOR:
+		*hash_algo = HCLGEVF_RSS_HASH_ALGO_SIMPLE;
+		return 0;
+	case ETH_RSS_HASH_NO_CHANGE:
+		*hash_algo = hdev->rss_cfg.hash_algo;
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
 static int hclgevf_set_rss(struct hnae3_handle *handle, const u32 *indir,
 			   const u8 *key, const u8 hfunc)
 {
 	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
 	struct hclgevf_rss_cfg *rss_cfg = &hdev->rss_cfg;
+	u8 hash_algo;
 	int ret, i;
 
 	if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) {
+		ret = hclgevf_parse_rss_hfunc(hdev, hfunc, &hash_algo);
+		if (ret)
+			return ret;
+
 		/* Set the RSS Hash Key if specififed by the user */
 		if (key) {
-			switch (hfunc) {
-			case ETH_RSS_HASH_TOP:
-				rss_cfg->hash_algo =
-					HCLGEVF_RSS_HASH_ALGO_TOEPLITZ;
-				break;
-			case ETH_RSS_HASH_XOR:
-				rss_cfg->hash_algo =
-					HCLGEVF_RSS_HASH_ALGO_SIMPLE;
-				break;
-			case ETH_RSS_HASH_NO_CHANGE:
-				break;
-			default:
-				return -EINVAL;
-			}
-
-			ret = hclgevf_set_rss_algo_key(hdev, rss_cfg->hash_algo,
-						       key);
-			if (ret)
+			ret = hclgevf_set_rss_algo_key(hdev, hash_algo, key);
+			if (ret) {
+				dev_err(&hdev->pdev->dev,
+					"invalid hfunc type %u\n", hfunc);
 				return ret;
+			}
 
 			/* Update the shadow RSS key with user specified qids */
 			memcpy(rss_cfg->rss_hash_key, key,
 			       HCLGEVF_RSS_KEY_SIZE);
+		} else {
+			ret = hclgevf_set_rss_algo_key(hdev, hash_algo,
+						       rss_cfg->rss_hash_key);
+			if (ret)
+				return ret;
 		}
+		rss_cfg->hash_algo = hash_algo;
 	}
 
 	/* update the shadow RSS table with user specified qids */
-- 
cgit v1.2.3


From 91bc0d5272d3a4dc3d4fd2a74387c7e7361bbe96 Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Wed, 15 Sep 2021 21:52:07 +0800
Subject: net: hns3: fix inconsistent vf id print

The vf id from ethtool is added 1 before configured to driver.
So it's necessary to minus 1 when printing it, in order to
keep consistent with user's configuration.

Fixes: dd74f815dd41 ("net: hns3: Add support for rule add/delete for flow director")
Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 36c8741445e8..c0f25ea043b0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -6642,10 +6642,13 @@ static int hclge_fd_parse_ring_cookie(struct hclge_dev *hdev, u64 ring_cookie,
 		u8 vf = ethtool_get_flow_spec_ring_vf(ring_cookie);
 		u16 tqps;
 
+		/* To keep consistent with user's configuration, minus 1 when
+		 * printing 'vf', because vf id from ethtool is added 1 for vf.
+		 */
 		if (vf > hdev->num_req_vfs) {
 			dev_err(&hdev->pdev->dev,
-				"Error: vf id (%u) > max vf num (%u)\n",
-				vf, hdev->num_req_vfs);
+				"Error: vf id (%u) should be less than %u\n",
+				vf - 1, hdev->num_req_vfs);
 			return -EINVAL;
 		}
 
-- 
cgit v1.2.3


From 311c0aaa9b4bb8dc65f22634e15963316b17c921 Mon Sep 17 00:00:00 2001
From: Jiaran Zhang <zhangjiaran@huawei.com>
Date: Wed, 15 Sep 2021 21:52:08 +0800
Subject: net: hns3: fix misuse vf id and vport id in some logs

vport_id include PF and VFs, vport_id = 0 means PF, other values mean VFs.
So the actual vf id is equal to vport_id minus 1.

Some VF print logs are actually vport, and logs of vf id actually use
vport id, so this patch fixes them.

Fixes: ac887be5b0fe ("net: hns3: change print level of RAS error log from warning to error")
Fixes: adcf738b804b ("net: hns3: cleanup some print format warning")
Signed-off-by: Jiaran Zhang <zhangjiaran@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c  |  8 ++++----
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 10 ++++++----
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c  |  2 +-
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c   |  2 +-
 4 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
index 718c16d686fa..bb9b026ae88e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
@@ -2445,12 +2445,12 @@ static void hclge_handle_over_8bd_err(struct hclge_dev *hdev,
 		return;
 	}
 
-	dev_err(dev, "PPU_PF_ABNORMAL_INT_ST over_8bd_no_fe found, vf_id(%u), queue_id(%u)\n",
+	dev_err(dev, "PPU_PF_ABNORMAL_INT_ST over_8bd_no_fe found, vport(%u), queue_id(%u)\n",
 		vf_id, q_id);
 
 	if (vf_id) {
 		if (vf_id >= hdev->num_alloc_vport) {
-			dev_err(dev, "invalid vf id(%u)\n", vf_id);
+			dev_err(dev, "invalid vport(%u)\n", vf_id);
 			return;
 		}
 
@@ -2463,8 +2463,8 @@ static void hclge_handle_over_8bd_err(struct hclge_dev *hdev,
 
 		ret = hclge_inform_reset_assert_to_vf(&hdev->vport[vf_id]);
 		if (ret)
-			dev_err(dev, "inform reset to vf(%u) failed %d!\n",
-				hdev->vport->vport_id, ret);
+			dev_err(dev, "inform reset to vport(%u) failed %d!\n",
+				vf_id, ret);
 	} else {
 		set_bit(HNAE3_FUNC_RESET, reset_requests);
 	}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index c0f25ea043b0..afc88a41a89c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -3661,7 +3661,8 @@ static int hclge_set_all_vf_rst(struct hclge_dev *hdev, bool reset)
 		if (ret) {
 			dev_err(&hdev->pdev->dev,
 				"set vf(%u) rst failed %d!\n",
-				vport->vport_id, ret);
+				vport->vport_id - HCLGE_VF_VPORT_START_NUM,
+				ret);
 			return ret;
 		}
 
@@ -3676,7 +3677,8 @@ static int hclge_set_all_vf_rst(struct hclge_dev *hdev, bool reset)
 		if (ret)
 			dev_warn(&hdev->pdev->dev,
 				 "inform reset to vf(%u) failed %d!\n",
-				 vport->vport_id, ret);
+				 vport->vport_id - HCLGE_VF_VPORT_START_NUM,
+				 ret);
 	}
 
 	return 0;
@@ -11467,11 +11469,11 @@ static void hclge_clear_resetting_state(struct hclge_dev *hdev)
 		struct hclge_vport *vport = &hdev->vport[i];
 		int ret;
 
-		 /* Send cmd to clear VF's FUNC_RST_ING */
+		 /* Send cmd to clear vport's FUNC_RST_ING */
 		ret = hclge_set_vf_rst(hdev, vport->vport_id, false);
 		if (ret)
 			dev_warn(&hdev->pdev->dev,
-				 "clear vf(%u) rst failed %d!\n",
+				 "clear vport(%u) rst failed %d!\n",
 				 vport->vport_id, ret);
 	}
 }
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index 2ce5302c5956..07aa6ada4fdb 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -566,7 +566,7 @@ static int hclge_reset_vf(struct hclge_vport *vport)
 	struct hclge_dev *hdev = vport->back;
 
 	dev_warn(&hdev->pdev->dev, "PF received VF reset request from VF %u!",
-		 vport->vport_id);
+		 vport->vport_id - HCLGE_VF_VPORT_START_NUM);
 
 	return hclge_func_reset_cmd(hdev, vport->vport_id);
 }
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index 78d5bf1ea561..44618cc4cca1 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -581,7 +581,7 @@ int hclge_tm_qs_shaper_cfg(struct hclge_vport *vport, int max_tx_rate)
 		ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 		if (ret) {
 			dev_err(&hdev->pdev->dev,
-				"vf%u, qs%u failed to set tx_rate:%d, ret=%d\n",
+				"vport%u, qs%u failed to set tx_rate:%d, ret=%d\n",
 				vport->vport_id, shap_cfg_cmd->qs_id,
 				max_tx_rate, ret);
 			return ret;
-- 
cgit v1.2.3


From 63b1279d9905100a14da9e043de7b28e99dba3f8 Mon Sep 17 00:00:00 2001
From: Yufeng Mo <moyufeng@huawei.com>
Date: Wed, 15 Sep 2021 21:52:09 +0800
Subject: net: hns3: check queue id range before using

The input parameters may not be reliable. Before using the
queue id, we should check this parameter. Otherwise, memory
overwriting may occur.

Fixes: d34100184685 ("net: hns3: refactor the mailbox message between PF and VF")
Signed-off-by: Yufeng Mo <moyufeng@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index 07aa6ada4fdb..65d78ee4d65a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -590,9 +590,17 @@ static void hclge_get_queue_id_in_pf(struct hclge_vport *vport,
 				     struct hclge_mbx_vf_to_pf_cmd *mbx_req,
 				     struct hclge_respond_to_vf_msg *resp_msg)
 {
+	struct hnae3_handle *handle = &vport->nic;
+	struct hclge_dev *hdev = vport->back;
 	u16 queue_id, qid_in_pf;
 
 	memcpy(&queue_id, mbx_req->msg.data, sizeof(queue_id));
+	if (queue_id >= handle->kinfo.num_tqps) {
+		dev_err(&hdev->pdev->dev, "Invalid queue id(%u) from VF %u\n",
+			queue_id, mbx_req->mbx_src_vfid);
+		return;
+	}
+
 	qid_in_pf = hclge_covert_handle_qid_global(&vport->nic, queue_id);
 	memcpy(resp_msg->data, &qid_in_pf, sizeof(qid_in_pf));
 	resp_msg->len = sizeof(qid_in_pf);
-- 
cgit v1.2.3


From ef39d632608e66f428c1246836fd060cf4818d67 Mon Sep 17 00:00:00 2001
From: liaoguojia <liaoguojia@huawei.com>
Date: Wed, 15 Sep 2021 21:52:10 +0800
Subject: net: hns3: check vlan id before using it

The input parameters may not be reliable, so check the vlan id before
using it, otherwise may set wrong vlan id into hardware.

Fixes: dc8131d846d4 ("net: hns3: Fix for packet loss due wrong filter config in VLAN tbls")
Signed-off-by: liaoguojia <liaoguojia@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index afc88a41a89c..14e9daf09f8c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -9817,6 +9817,9 @@ static int hclge_set_vlan_filter_hw(struct hclge_dev *hdev, __be16 proto,
 	if (is_kill && !vlan_id)
 		return 0;
 
+	if (vlan_id >= VLAN_N_VID)
+		return -EINVAL;
+
 	ret = hclge_set_vf_vlan_common(hdev, vport_id, is_kill, vlan_id);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
-- 
cgit v1.2.3


From 5126b9d3d4acdebc12b9d436282f88d8a1b5146c Mon Sep 17 00:00:00 2001
From: Yufeng Mo <moyufeng@huawei.com>
Date: Wed, 15 Sep 2021 21:52:11 +0800
Subject: net: hns3: fix a return value error in hclge_get_reset_status()

hclge_get_reset_status() should return the tqp reset status.
However, if the CMDQ fails, the caller will take it as tqp reset
success status by mistake. Therefore, uses a parameters to get
the tqp reset status instead.

Fixes: 46a3df9f9718 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support")
Signed-off-by: Yufeng Mo <moyufeng@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 14e9daf09f8c..47fea8985861 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -10726,7 +10726,8 @@ static int hclge_reset_tqp_cmd_send(struct hclge_dev *hdev, u16 queue_id,
 	return 0;
 }
 
-static int hclge_get_reset_status(struct hclge_dev *hdev, u16 queue_id)
+static int hclge_get_reset_status(struct hclge_dev *hdev, u16 queue_id,
+				  u8 *reset_status)
 {
 	struct hclge_reset_tqp_queue_cmd *req;
 	struct hclge_desc desc;
@@ -10744,7 +10745,9 @@ static int hclge_get_reset_status(struct hclge_dev *hdev, u16 queue_id)
 		return ret;
 	}
 
-	return hnae3_get_bit(req->ready_to_reset, HCLGE_TQP_RESET_B);
+	*reset_status = hnae3_get_bit(req->ready_to_reset, HCLGE_TQP_RESET_B);
+
+	return 0;
 }
 
 u16 hclge_covert_handle_qid_global(struct hnae3_handle *handle, u16 queue_id)
@@ -10763,7 +10766,7 @@ static int hclge_reset_tqp_cmd(struct hnae3_handle *handle)
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
 	u16 reset_try_times = 0;
-	int reset_status;
+	u8 reset_status;
 	u16 queue_gid;
 	int ret;
 	u16 i;
@@ -10779,7 +10782,11 @@ static int hclge_reset_tqp_cmd(struct hnae3_handle *handle)
 		}
 
 		while (reset_try_times++ < HCLGE_TQP_RESET_TRY_TIMES) {
-			reset_status = hclge_get_reset_status(hdev, queue_gid);
+			ret = hclge_get_reset_status(hdev, queue_gid,
+						     &reset_status);
+			if (ret)
+				return ret;
+
 			if (reset_status)
 				break;
 
-- 
cgit v1.2.3


From 6c90731980655280ea07ce4b21eb97457bf86286 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Mon, 20 Sep 2021 21:18:14 +0200
Subject: net/smc: add missing error check in smc_clc_prfx_set()

Coverity stumbled over a missing error check in smc_clc_prfx_set():

*** CID 1475954:  Error handling issues  (CHECKED_RETURN)
/net/smc/smc_clc.c: 233 in smc_clc_prfx_set()
>>>     CID 1475954:  Error handling issues  (CHECKED_RETURN)
>>>     Calling "kernel_getsockname" without checking return value (as is done elsewhere 8 out of 10 times).
233     	kernel_getsockname(clcsock, (struct sockaddr *)&addrs);

Add the return code check in smc_clc_prfx_set().

Fixes: c246d942eabc ("net/smc: restructure netinfo for CLC proposal msgs")
Reported-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_clc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index e286dafd6e88..6ec1ebe878ae 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -230,7 +230,8 @@ static int smc_clc_prfx_set(struct socket *clcsock,
 		goto out_rel;
 	}
 	/* get address to which the internal TCP socket is bound */
-	kernel_getsockname(clcsock, (struct sockaddr *)&addrs);
+	if (kernel_getsockname(clcsock, (struct sockaddr *)&addrs) < 0)
+		goto out_rel;
 	/* analyze IP specific data of net_device belonging to TCP socket */
 	addr6 = (struct sockaddr_in6 *)&addrs;
 	rcu_read_lock();
-- 
cgit v1.2.3


From a18cee4791b1123d0a6579a7c89f4b87e48abe03 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Mon, 20 Sep 2021 21:18:15 +0200
Subject: net/smc: fix 'workqueue leaked lock' in smc_conn_abort_work

The abort_work is scheduled when a connection was detected to be
out-of-sync after a link failure. The work calls smc_conn_kill(),
which calls smc_close_active_abort() and that might end up calling
smc_close_cancel_work().
smc_close_cancel_work() cancels any pending close_work and tx_work but
needs to release the sock_lock before and acquires the sock_lock again
afterwards. So when the sock_lock was NOT acquired before then it may
be held after the abort_work completes. Thats why the sock_lock is
acquired before the call to smc_conn_kill() in __smc_lgr_terminate(),
but this is missing in smc_conn_abort_work().

Fix that by acquiring the sock_lock first and release it after the
call to smc_conn_kill().

Fixes: b286a0651e44 ("net/smc: handle incoming CDC validation message")
Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index af227b65669e..8280c938be80 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1474,7 +1474,9 @@ static void smc_conn_abort_work(struct work_struct *work)
 						   abort_work);
 	struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
 
+	lock_sock(&smc->sk);
 	smc_conn_kill(conn, true);
+	release_sock(&smc->sk);
 	sock_put(&smc->sk); /* sock_hold done by schedulers of abort_work */
 }
 
-- 
cgit v1.2.3


From e5845aa0eadda3d8a950eb8845c1396827131f30 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 21 Sep 2021 01:49:18 +0300
Subject: net: dsa: fix dsa_tree_setup error path

Since the blamed commit, dsa_tree_teardown_switches() was split into two
smaller functions, dsa_tree_teardown_switches and dsa_tree_teardown_ports.

However, the error path of dsa_tree_setup stopped calling dsa_tree_teardown_ports.

Fixes: a57d8c217aad ("net: dsa: flush switchdev workqueue before tearing down CPU/DSA ports")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/dsa2.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index f14897d9b31d..f54639a3a822 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -1089,6 +1089,7 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst)
 teardown_master:
 	dsa_tree_teardown_master(dst);
 teardown_switches:
+	dsa_tree_teardown_ports(dst);
 	dsa_tree_teardown_switches(dst);
 teardown_cpu_ports:
 	dsa_tree_teardown_cpu_ports(dst);
-- 
cgit v1.2.3


From 3e95cfa24e24fbd7fb7675ab972a5aa507c7a89c Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Tue, 21 Sep 2021 15:41:23 +0900
Subject: Doc: networking: Fox a typo in ice.rst

This patch fixes a spelling typo in ice.rst

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/device_drivers/ethernet/intel/ice.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/networking/device_drivers/ethernet/intel/ice.rst b/Documentation/networking/device_drivers/ethernet/intel/ice.rst
index e7d9cbff771b..67b7a701ce9e 100644
--- a/Documentation/networking/device_drivers/ethernet/intel/ice.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/ice.rst
@@ -851,7 +851,7 @@ NOTES:
 - 0x88A8 traffic will not be received unless VLAN stripping is disabled with
   the following command::
 
-    # ethool -K <ethX> rxvlan off
+    # ethtool -K <ethX> rxvlan off
 
 - 0x88A8/0x8100 double VLANs cannot be used with 0x8100 or 0x8100/0x8100 VLANS
   configured on the same port. 0x88a8/0x8100 traffic will not be received if
-- 
cgit v1.2.3


From 5135e96a3dd2f4555ae6981c3155a62bcf3227f6 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 21 Sep 2021 00:42:08 +0300
Subject: net: dsa: don't allocate the slave_mii_bus using devres

The Linux device model permits both the ->shutdown and ->remove driver
methods to get called during a shutdown procedure. Example: a DSA switch
which sits on an SPI bus, and the SPI bus driver calls this on its
->shutdown method:

spi_unregister_controller
-> device_for_each_child(&ctlr->dev, NULL, __unregister);
   -> spi_unregister_device(to_spi_device(dev));
      -> device_del(&spi->dev);

So this is a simple pattern which can theoretically appear on any bus,
although the only other buses on which I've been able to find it are
I2C:

i2c_del_adapter
-> device_for_each_child(&adap->dev, NULL, __unregister_client);
   -> i2c_unregister_device(client);
      -> device_unregister(&client->dev);

The implication of this pattern is that devices on these buses can be
unregistered after having been shut down. The drivers for these devices
might choose to return early either from ->remove or ->shutdown if the
other callback has already run once, and they might choose that the
->shutdown method should only perform a subset of the teardown done by
->remove (to avoid unnecessary delays when rebooting).

So in other words, the device driver may choose on ->remove to not
do anything (therefore to not unregister an MDIO bus it has registered
on ->probe), because this ->remove is actually triggered by the
device_shutdown path, and its ->shutdown method has already run and done
the minimally required cleanup.

This used to be fine until the blamed commit, but now, the following
BUG_ON triggers:

void mdiobus_free(struct mii_bus *bus)
{
	/* For compatibility with error handling in drivers. */
	if (bus->state == MDIOBUS_ALLOCATED) {
		kfree(bus);
		return;
	}

	BUG_ON(bus->state != MDIOBUS_UNREGISTERED);
	bus->state = MDIOBUS_RELEASED;

	put_device(&bus->dev);
}

In other words, there is an attempt to free an MDIO bus which was not
unregistered. The attempt to free it comes from the devres release
callbacks of the SPI device, which are executed after the device is
unregistered.

I'm not saying that the fact that MDIO buses allocated using devres
would automatically get unregistered wasn't strange. I'm just saying
that the commit didn't care about auditing existing call paths in the
kernel, and now, the following code sequences are potentially buggy:

(a) devm_mdiobus_alloc followed by plain mdiobus_register, for a device
    located on a bus that unregisters its children on shutdown. After
    the blamed patch, either both the alloc and the register should use
    devres, or none should.

(b) devm_mdiobus_alloc followed by plain mdiobus_register, and then no
    mdiobus_unregister at all in the remove path. After the blamed
    patch, nobody unregisters the MDIO bus anymore, so this is even more
    buggy than the previous case which needs a specific bus
    configuration to be seen, this one is an unconditional bug.

In this case, DSA falls into category (a), it tries to be helpful and
registers an MDIO bus on behalf of the switch, which might be on such a
bus. I've no idea why it does it under devres.

It does this on probe:

	if (!ds->slave_mii_bus && ds->ops->phy_read)
		alloc and register mdio bus

and this on remove:

	if (ds->slave_mii_bus && ds->ops->phy_read)
		unregister mdio bus

I _could_ imagine using devres because the condition used on remove is
different than the condition used on probe. So strictly speaking, DSA
cannot determine whether the ds->slave_mii_bus it sees on remove is the
ds->slave_mii_bus that _it_ has allocated on probe. Using devres would
have solved that problem. But nonetheless, the existing code already
proceeds to unregister the MDIO bus, even though it might be
unregistering an MDIO bus it has never registered. So I can only guess
that no driver that implements ds->ops->phy_read also allocates and
registers ds->slave_mii_bus itself.

So in that case, if unregistering is fine, freeing must be fine too.

Stop using devres and free the MDIO bus manually. This will make devres
stop attempting to free a still registered MDIO bus on ->shutdown.

Fixes: ac3a68d56651 ("net: phy: don't abuse devres in devm_mdiobus_register()")
Reported-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Tested-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/dsa2.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index f54639a3a822..b29262eee00b 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -880,7 +880,7 @@ static int dsa_switch_setup(struct dsa_switch *ds)
 	devlink_params_publish(ds->devlink);
 
 	if (!ds->slave_mii_bus && ds->ops->phy_read) {
-		ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
+		ds->slave_mii_bus = mdiobus_alloc();
 		if (!ds->slave_mii_bus) {
 			err = -ENOMEM;
 			goto teardown;
@@ -890,13 +890,16 @@ static int dsa_switch_setup(struct dsa_switch *ds)
 
 		err = mdiobus_register(ds->slave_mii_bus);
 		if (err < 0)
-			goto teardown;
+			goto free_slave_mii_bus;
 	}
 
 	ds->setup = true;
 
 	return 0;
 
+free_slave_mii_bus:
+	if (ds->slave_mii_bus && ds->ops->phy_read)
+		mdiobus_free(ds->slave_mii_bus);
 teardown:
 	if (ds->ops->teardown)
 		ds->ops->teardown(ds);
@@ -921,8 +924,11 @@ static void dsa_switch_teardown(struct dsa_switch *ds)
 	if (!ds->setup)
 		return;
 
-	if (ds->slave_mii_bus && ds->ops->phy_read)
+	if (ds->slave_mii_bus && ds->ops->phy_read) {
 		mdiobus_unregister(ds->slave_mii_bus);
+		mdiobus_free(ds->slave_mii_bus);
+		ds->slave_mii_bus = NULL;
+	}
 
 	dsa_switch_unregister_notifier(ds);
 
-- 
cgit v1.2.3


From 74b6d7d13307b016f4b5bba8198297824c0ee6df Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 21 Sep 2021 00:42:09 +0300
Subject: net: dsa: realtek: register the MDIO bus under devres
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Linux device model permits both the ->shutdown and ->remove driver
methods to get called during a shutdown procedure. Example: a DSA switch
which sits on an SPI bus, and the SPI bus driver calls this on its
->shutdown method:

spi_unregister_controller
-> device_for_each_child(&ctlr->dev, NULL, __unregister);
   -> spi_unregister_device(to_spi_device(dev));
      -> device_del(&spi->dev);

So this is a simple pattern which can theoretically appear on any bus,
although the only other buses on which I've been able to find it are
I2C:

i2c_del_adapter
-> device_for_each_child(&adap->dev, NULL, __unregister_client);
   -> i2c_unregister_device(client);
      -> device_unregister(&client->dev);

The implication of this pattern is that devices on these buses can be
unregistered after having been shut down. The drivers for these devices
might choose to return early either from ->remove or ->shutdown if the
other callback has already run once, and they might choose that the
->shutdown method should only perform a subset of the teardown done by
->remove (to avoid unnecessary delays when rebooting).

So in other words, the device driver may choose on ->remove to not
do anything (therefore to not unregister an MDIO bus it has registered
on ->probe), because this ->remove is actually triggered by the
device_shutdown path, and its ->shutdown method has already run and done
the minimally required cleanup.

This used to be fine until the blamed commit, but now, the following
BUG_ON triggers:

void mdiobus_free(struct mii_bus *bus)
{
	/* For compatibility with error handling in drivers. */
	if (bus->state == MDIOBUS_ALLOCATED) {
		kfree(bus);
		return;
	}

	BUG_ON(bus->state != MDIOBUS_UNREGISTERED);
	bus->state = MDIOBUS_RELEASED;

	put_device(&bus->dev);
}

In other words, there is an attempt to free an MDIO bus which was not
unregistered. The attempt to free it comes from the devres release
callbacks of the SPI device, which are executed after the device is
unregistered.

I'm not saying that the fact that MDIO buses allocated using devres
would automatically get unregistered wasn't strange. I'm just saying
that the commit didn't care about auditing existing call paths in the
kernel, and now, the following code sequences are potentially buggy:

(a) devm_mdiobus_alloc followed by plain mdiobus_register, for a device
    located on a bus that unregisters its children on shutdown. After
    the blamed patch, either both the alloc and the register should use
    devres, or none should.

(b) devm_mdiobus_alloc followed by plain mdiobus_register, and then no
    mdiobus_unregister at all in the remove path. After the blamed
    patch, nobody unregisters the MDIO bus anymore, so this is even more
    buggy than the previous case which needs a specific bus
    configuration to be seen, this one is an unconditional bug.

In this case, the Realtek drivers fall under category (b). To solve it,
we can register the MDIO bus under devres too, which restores the
previous behavior.

Fixes: ac3a68d56651 ("net: phy: don't abuse devres in devm_mdiobus_register()")
Reported-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Reported-by: Alvin Šipraga <alsi@bang-olufsen.dk>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/realtek-smi-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/dsa/realtek-smi-core.c b/drivers/net/dsa/realtek-smi-core.c
index dd2f0d6208b3..2fcfd917b876 100644
--- a/drivers/net/dsa/realtek-smi-core.c
+++ b/drivers/net/dsa/realtek-smi-core.c
@@ -368,7 +368,7 @@ int realtek_smi_setup_mdio(struct realtek_smi *smi)
 	smi->slave_mii_bus->parent = smi->dev;
 	smi->ds->slave_mii_bus = smi->slave_mii_bus;
 
-	ret = of_mdiobus_register(smi->slave_mii_bus, mdio_np);
+	ret = devm_of_mdiobus_register(smi->dev, smi->slave_mii_bus, mdio_np);
 	if (ret) {
 		dev_err(smi->dev, "unable to register MDIO bus %s\n",
 			smi->slave_mii_bus->id);
-- 
cgit v1.2.3


From 248f064af222a1f97ee02c84a98013dfbccad386 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Tue, 21 Sep 2021 16:52:15 +0200
Subject: s390/qeth: fix NULL deref in qeth_clear_working_pool_list()

When qeth_set_online() calls qeth_clear_working_pool_list() to roll
back after an error exit from qeth_hardsetup_card(), we are at risk of
accessing card->qdio.in_q before it was allocated by
qeth_alloc_qdio_queues() via qeth_mpc_initialize().

qeth_clear_working_pool_list() then dereferences NULL, and by writing to
queue->bufs[i].pool_entry scribbles all over the CPU's lowcore.
Resulting in a crash when those lowcore areas are used next (eg. on
the next machine-check interrupt).

Such a scenario would typically happen when the device is first set
online and its queues aren't allocated yet. An early IO error or certain
misconfigs (eg. mismatched transport mode, bad portno) then cause us to
error out from qeth_hardsetup_card() with card->qdio.in_q still being
NULL.

Fix it by checking the pointer for NULL before accessing it.

Note that we also have (rare) paths inside qeth_mpc_initialize() where
a configuration change can cause us to free the existing queues,
expecting that subsequent code will allocate them again. If we then
error out before that re-allocation happens, the same bug occurs.

Fixes: eff73e16ee11 ("s390/qeth: tolerate pre-filled RX buffer")
Reported-by: Stefan Raspl <raspl@linux.ibm.com>
Root-caused-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Reviewed-by: Alexandra Winter <wintera@linux.ibm.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/s390/net/qeth_core_main.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 41ca6273b750..3fba440a0731 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -202,6 +202,9 @@ static void qeth_clear_working_pool_list(struct qeth_card *card)
 				 &card->qdio.in_buf_pool.entry_list, list)
 		list_del(&pool_entry->list);
 
+	if (!queue)
+		return;
+
 	for (i = 0; i < ARRAY_SIZE(queue->bufs); i++)
 		queue->bufs[i].pool_entry = NULL;
 }
-- 
cgit v1.2.3


From ee909d0b1dac8632eeb78cbf17661d6c7674bbd0 Mon Sep 17 00:00:00 2001
From: Alexandra Winter <wintera@linux.ibm.com>
Date: Tue, 21 Sep 2021 16:52:16 +0200
Subject: s390/qeth: Fix deadlock in remove_discipline

Problem: qeth_close_dev_handler is a worker that tries to acquire
card->discipline_mutex via drv->set_offline() in ccwgroup_set_offline().
Since commit b41b554c1ee7
("s390/qeth: fix locking for discipline setup / removal")
qeth_remove_discipline() is called under card->discipline_mutex and
cancels the work and waits for it to finish.

STOPLAN reception with reason code IPA_RC_VEPA_TO_VEB_TRANSITION is the
only situation that schedules close_dev_work. In that situation scheduling
qeth recovery will also result in an offline interface, when resetting the
isolation mode fails, if the external switch is still set to VEB.
And since commit 0b9902c1fcc5 ("s390/qeth: fix deadlock during recovery")
qeth recovery does not aquire card->discipline_mutex anymore.

So we accept the longer pathlength of qeth_schedule_recovery in this
error situation and re-use the existing function.

As a side-benefit this changes the hwtrap to behave like during recovery
instead of like during a user-triggered set_offline.

Fixes: b41b554c1ee7 ("s390/qeth: fix locking for discipline setup / removal")
Signed-off-by: Alexandra Winter <wintera@linux.ibm.com>
Acked-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/s390/net/qeth_core.h      |  1 -
 drivers/s390/net/qeth_core_main.c | 16 ++++------------
 drivers/s390/net/qeth_l2_main.c   |  1 -
 drivers/s390/net/qeth_l3_main.c   |  1 -
 4 files changed, 4 insertions(+), 15 deletions(-)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 535a60b3946d..a5aa0bdc61d6 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -858,7 +858,6 @@ struct qeth_card {
 	struct napi_struct napi;
 	struct qeth_rx rx;
 	struct delayed_work buffer_reclaim_work;
-	struct work_struct close_dev_work;
 };
 
 static inline bool qeth_card_hw_is_reachable(struct qeth_card *card)
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 3fba440a0731..9f26706051e5 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -70,15 +70,6 @@ static void qeth_issue_next_read_cb(struct qeth_card *card,
 static int qeth_qdio_establish(struct qeth_card *);
 static void qeth_free_qdio_queues(struct qeth_card *card);
 
-static void qeth_close_dev_handler(struct work_struct *work)
-{
-	struct qeth_card *card;
-
-	card = container_of(work, struct qeth_card, close_dev_work);
-	QETH_CARD_TEXT(card, 2, "cldevhdl");
-	ccwgroup_set_offline(card->gdev);
-}
-
 static const char *qeth_get_cardname(struct qeth_card *card)
 {
 	if (IS_VM_NIC(card)) {
@@ -795,10 +786,12 @@ static struct qeth_ipa_cmd *qeth_check_ipa_data(struct qeth_card *card,
 	case IPA_CMD_STOPLAN:
 		if (cmd->hdr.return_code == IPA_RC_VEPA_TO_VEB_TRANSITION) {
 			dev_err(&card->gdev->dev,
-				"Interface %s is down because the adjacent port is no longer in reflective relay mode\n",
+				"Adjacent port of interface %s is no longer in reflective relay mode, trigger recovery\n",
 				netdev_name(card->dev));
-			schedule_work(&card->close_dev_work);
+			/* Set offline, then probably fail to set online: */
+			qeth_schedule_recovery(card);
 		} else {
+			/* stay online for subsequent STARTLAN */
 			dev_warn(&card->gdev->dev,
 				 "The link for interface %s on CHPID 0x%X failed\n",
 				 netdev_name(card->dev), card->info.chpid);
@@ -1540,7 +1533,6 @@ static void qeth_setup_card(struct qeth_card *card)
 	INIT_LIST_HEAD(&card->ipato.entries);
 	qeth_init_qdio_info(card);
 	INIT_DELAYED_WORK(&card->buffer_reclaim_work, qeth_buffer_reclaim_work);
-	INIT_WORK(&card->close_dev_work, qeth_close_dev_handler);
 	hash_init(card->rx_mode_addrs);
 	hash_init(card->local_addrs4);
 	hash_init(card->local_addrs6);
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 72e84ff9fea5..dc6c00768d91 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -2307,7 +2307,6 @@ static void qeth_l2_remove_device(struct ccwgroup_device *gdev)
 	if (gdev->state == CCWGROUP_ONLINE)
 		qeth_set_offline(card, card->discipline, false);
 
-	cancel_work_sync(&card->close_dev_work);
 	if (card->dev->reg_state == NETREG_REGISTERED) {
 		priv = netdev_priv(card->dev);
 		if (priv->brport_features & BR_LEARNING_SYNC) {
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 3a523e700a5a..6fd3e288f059 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -1969,7 +1969,6 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev)
 	if (cgdev->state == CCWGROUP_ONLINE)
 		qeth_set_offline(card, card->discipline, false);
 
-	cancel_work_sync(&card->close_dev_work);
 	if (card->dev->reg_state == NETREG_REGISTERED)
 		unregister_netdev(card->dev);
 
-- 
cgit v1.2.3


From d2b59bd4b06d84a4eadb520b0f71c62fe8ec0a62 Mon Sep 17 00:00:00 2001
From: Alexandra Winter <wintera@linux.ibm.com>
Date: Tue, 21 Sep 2021 16:52:17 +0200
Subject: s390/qeth: fix deadlock during failing recovery

Commit 0b9902c1fcc5 ("s390/qeth: fix deadlock during recovery") removed
taking discipline_mutex inside qeth_do_reset(), fixing potential
deadlocks. An error path was missed though, that still takes
discipline_mutex and thus has the original deadlock potential.

Intermittent deadlocks were seen when a qeth channel path is configured
offline, causing a race between qeth_do_reset and ccwgroup_remove.
Call qeth_set_offline() directly in the qeth_do_reset() error case and
then a new variant of ccwgroup_set_offline(), without taking
discipline_mutex.

Fixes: b41b554c1ee7 ("s390/qeth: fix locking for discipline setup / removal")
Signed-off-by: Alexandra Winter <wintera@linux.ibm.com>
Reviewed-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 arch/s390/include/asm/ccwgroup.h  |  2 +-
 drivers/s390/cio/ccwgroup.c       | 10 ++++++++--
 drivers/s390/net/qeth_core_main.c |  3 ++-
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/arch/s390/include/asm/ccwgroup.h b/arch/s390/include/asm/ccwgroup.h
index 36dbf5043fc0..aa995d91cd1d 100644
--- a/arch/s390/include/asm/ccwgroup.h
+++ b/arch/s390/include/asm/ccwgroup.h
@@ -55,7 +55,7 @@ int ccwgroup_create_dev(struct device *root, struct ccwgroup_driver *gdrv,
 			int num_devices, const char *buf);
 
 extern int ccwgroup_set_online(struct ccwgroup_device *gdev);
-extern int ccwgroup_set_offline(struct ccwgroup_device *gdev);
+int ccwgroup_set_offline(struct ccwgroup_device *gdev, bool call_gdrv);
 
 extern int ccwgroup_probe_ccwdev(struct ccw_device *cdev);
 extern void ccwgroup_remove_ccwdev(struct ccw_device *cdev);
diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c
index 2ec741106cb6..f0538609dfe4 100644
--- a/drivers/s390/cio/ccwgroup.c
+++ b/drivers/s390/cio/ccwgroup.c
@@ -77,12 +77,13 @@ EXPORT_SYMBOL(ccwgroup_set_online);
 /**
  * ccwgroup_set_offline() - disable a ccwgroup device
  * @gdev: target ccwgroup device
+ * @call_gdrv: Call the registered gdrv set_offline function
  *
  * This function attempts to put the ccwgroup device into the offline state.
  * Returns:
  *  %0 on success and a negative error value on failure.
  */
-int ccwgroup_set_offline(struct ccwgroup_device *gdev)
+int ccwgroup_set_offline(struct ccwgroup_device *gdev, bool call_gdrv)
 {
 	struct ccwgroup_driver *gdrv = to_ccwgroupdrv(gdev->dev.driver);
 	int ret = -EINVAL;
@@ -91,11 +92,16 @@ int ccwgroup_set_offline(struct ccwgroup_device *gdev)
 		return -EAGAIN;
 	if (gdev->state == CCWGROUP_OFFLINE)
 		goto out;
+	if (!call_gdrv) {
+		ret = 0;
+		goto offline;
+	}
 	if (gdrv->set_offline)
 		ret = gdrv->set_offline(gdev);
 	if (ret)
 		goto out;
 
+offline:
 	gdev->state = CCWGROUP_OFFLINE;
 out:
 	atomic_set(&gdev->onoff, 0);
@@ -124,7 +130,7 @@ static ssize_t ccwgroup_online_store(struct device *dev,
 	if (value == 1)
 		ret = ccwgroup_set_online(gdev);
 	else if (value == 0)
-		ret = ccwgroup_set_offline(gdev);
+		ret = ccwgroup_set_offline(gdev, true);
 	else
 		ret = -EINVAL;
 out:
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 9f26706051e5..e9807d2996a9 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -5514,7 +5514,8 @@ static int qeth_do_reset(void *data)
 		dev_info(&card->gdev->dev,
 			 "Device successfully recovered!\n");
 	} else {
-		ccwgroup_set_offline(card->gdev);
+		qeth_set_offline(card, disc, true);
+		ccwgroup_set_offline(card->gdev, false);
 		dev_warn(&card->gdev->dev,
 			 "The qeth device driver failed to recover an error on the device\n");
 	}
-- 
cgit v1.2.3


From 1ea7812326004afd2803cc968a4776ae5120a597 Mon Sep 17 00:00:00 2001
From: Shai Malin <smalin@marvell.com>
Date: Wed, 22 Sep 2021 13:53:26 +0300
Subject: qed: rdma - don't wait for resources under hw error recovery flow

If the HW device is during recovery, the HW resources will never return,
hence we shouldn't wait for the CID (HW context ID) bitmaps to clear.
This fix speeds up the error recovery flow.

Fixes: 64515dc899df ("qed: Add infrastructure for error detection and recovery")
Signed-off-by: Michal Kalderon <mkalderon@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: Shai Malin <smalin@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 8 ++++++++
 drivers/net/ethernet/qlogic/qed/qed_roce.c  | 8 ++++++++
 2 files changed, 16 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
index fc8b3e64f153..186d0048a9d1 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
@@ -1297,6 +1297,14 @@ qed_iwarp_wait_cid_map_cleared(struct qed_hwfn *p_hwfn, struct qed_bmap *bmap)
 	prev_weight = weight;
 
 	while (weight) {
+		/* If the HW device is during recovery, all resources are
+		 * immediately reset without receiving a per-cid indication
+		 * from HW. In this case we don't expect the cid_map to be
+		 * cleared.
+		 */
+		if (p_hwfn->cdev->recov_in_prog)
+			return 0;
+
 		msleep(QED_IWARP_MAX_CID_CLEAN_TIME);
 
 		weight = bitmap_weight(bmap->bitmap, bmap->max_count);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c
index f16a157bb95a..cf5baa5e59bc 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_roce.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c
@@ -77,6 +77,14 @@ void qed_roce_stop(struct qed_hwfn *p_hwfn)
 	 * Beyond the added delay we clear the bitmap anyway.
 	 */
 	while (bitmap_weight(rcid_map->bitmap, rcid_map->max_count)) {
+		/* If the HW device is during recovery, all resources are
+		 * immediately reset without receiving a per-cid indication
+		 * from HW. In this case we don't expect the cid bitmap to be
+		 * cleared.
+		 */
+		if (p_hwfn->cdev->recov_in_prog)
+			return;
+
 		msleep(100);
 		if (wait_count++ > 20) {
 			DP_NOTICE(p_hwfn, "cid bitmap wait timed out\n");
-- 
cgit v1.2.3


From 977d293e23b48a1129830d7968605f61c4af71a0 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Wed, 22 Sep 2021 13:12:17 +0200
Subject: mptcp: ensure tx skbs always have the MPTCP ext

Due to signed/unsigned comparison, the expression:

	info->size_goal - skb->len > 0

evaluates to true when the size goal is smaller than the
skb size. That results in lack of tx cache refill, so that
the skb allocated by the core TCP code lacks the required
MPTCP skb extensions.

Due to the above, syzbot is able to trigger the following WARN_ON():

WARNING: CPU: 1 PID: 810 at net/mptcp/protocol.c:1366 mptcp_sendmsg_frag+0x1362/0x1bc0 net/mptcp/protocol.c:1366
Modules linked in:
CPU: 1 PID: 810 Comm: syz-executor.4 Not tainted 5.14.0-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
RIP: 0010:mptcp_sendmsg_frag+0x1362/0x1bc0 net/mptcp/protocol.c:1366
Code: ff 4c 8b 74 24 50 48 8b 5c 24 58 e9 0f fb ff ff e8 13 44 8b f8 4c 89 e7 45 31 ed e8 98 57 2e fe e9 81 f4 ff ff e8 fe 43 8b f8 <0f> 0b 41 bd ea ff ff ff e9 6f f4 ff ff 4c 89 e7 e8 b9 8e d2 f8 e9
RSP: 0018:ffffc9000531f6a0 EFLAGS: 00010216
RAX: 000000000000697f RBX: 0000000000000000 RCX: ffffc90012107000
RDX: 0000000000040000 RSI: ffffffff88eac9e2 RDI: 0000000000000003
RBP: ffff888078b15780 R08: 0000000000000000 R09: 0000000000000000
R10: ffffffff88eac017 R11: 0000000000000000 R12: ffff88801de0a280
R13: 0000000000006b58 R14: ffff888066278280 R15: ffff88803c2fe9c0
FS:  00007fd9f866e700(0000) GS:ffff8880b9d00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007faebcb2f718 CR3: 00000000267cb000 CR4: 00000000001506e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 __mptcp_push_pending+0x1fb/0x6b0 net/mptcp/protocol.c:1547
 mptcp_release_cb+0xfe/0x210 net/mptcp/protocol.c:3003
 release_sock+0xb4/0x1b0 net/core/sock.c:3206
 sk_stream_wait_memory+0x604/0xed0 net/core/stream.c:145
 mptcp_sendmsg+0xc39/0x1bc0 net/mptcp/protocol.c:1749
 inet6_sendmsg+0x99/0xe0 net/ipv6/af_inet6.c:643
 sock_sendmsg_nosec net/socket.c:704 [inline]
 sock_sendmsg+0xcf/0x120 net/socket.c:724
 sock_write_iter+0x2a0/0x3e0 net/socket.c:1057
 call_write_iter include/linux/fs.h:2163 [inline]
 new_sync_write+0x40b/0x640 fs/read_write.c:507
 vfs_write+0x7cf/0xae0 fs/read_write.c:594
 ksys_write+0x1ee/0x250 fs/read_write.c:647
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x44/0xae
RIP: 0033:0x4665f9
Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007fd9f866e188 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 000000000056c038 RCX: 00000000004665f9
RDX: 00000000000e7b78 RSI: 0000000020000000 RDI: 0000000000000003
RBP: 00000000004bfcc4 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 000000000056c038
R13: 0000000000a9fb1f R14: 00007fd9f866e300 R15: 0000000000022000

Fix the issue rewriting the relevant expression to avoid
sign-related problems - note: size_goal is always >= 0.

Additionally, ensure that the skb in the tx cache always carries
the relevant extension.

Reported-and-tested-by: syzbot+263a248eec3e875baa7b@syzkaller.appspotmail.com
Fixes: 1094c6fe7280 ("mptcp: fix possible divide by zero")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 2602f1386160..dbcebf56798f 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1316,7 +1316,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
 			goto alloc_skb;
 		}
 
-		must_collapse = (info->size_goal - skb->len > 0) &&
+		must_collapse = (info->size_goal > skb->len) &&
 				(skb_shinfo(skb)->nr_frags < sysctl_max_skb_frags);
 		if (must_collapse) {
 			size_bias = skb->len;
@@ -1325,7 +1325,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
 	}
 
 alloc_skb:
-	if (!must_collapse && !ssk->sk_tx_skb_cache &&
+	if (!must_collapse &&
 	    !mptcp_alloc_tx_skb(sk, ssk, info->data_lock_held))
 		return 0;
 
-- 
cgit v1.2.3


From 3106a0847525befe3e22fc723909d1b21eb0d520 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Wed, 22 Sep 2021 13:25:40 +0300
Subject: nexthop: Fix memory leaks in nexthop notification chain listeners

syzkaller discovered memory leaks [1] that can be reduced to the
following commands:

 # ip nexthop add id 1 blackhole
 # devlink dev reload pci/0000:06:00.0

As part of the reload flow, mlxsw will unregister its netdevs and then
unregister from the nexthop notification chain. Before unregistering
from the notification chain, mlxsw will receive delete notifications for
nexthop objects using netdevs registered by mlxsw or their uppers. mlxsw
will not receive notifications for nexthops using netdevs that are not
dismantled as part of the reload flow. For example, the blackhole
nexthop above that internally uses the loopback netdev as its nexthop
device.

One way to fix this problem is to have listeners flush their nexthop
tables after unregistering from the notification chain. This is
error-prone as evident by this patch and also not symmetric with the
registration path where a listener receives a dump of all the existing
nexthops.

Therefore, fix this problem by replaying delete notifications for the
listener being unregistered. This is symmetric to the registration path
and also consistent with the netdev notification chain.

The above means that unregister_nexthop_notifier(), like
register_nexthop_notifier(), will have to take RTNL in order to iterate
over the existing nexthops and that any callers of the function cannot
hold RTNL. This is true for mlxsw and netdevsim, but not for the VXLAN
driver. To avoid a deadlock, change the latter to unregister its nexthop
listener without holding RTNL, making it symmetric to the registration
path.

[1]
unreferenced object 0xffff88806173d600 (size 512):
  comm "syz-executor.0", pid 1290, jiffies 4295583142 (age 143.507s)
  hex dump (first 32 bytes):
    41 9d 1e 60 80 88 ff ff 08 d6 73 61 80 88 ff ff  A..`......sa....
    08 d6 73 61 80 88 ff ff 01 00 00 00 00 00 00 00  ..sa............
  backtrace:
    [<ffffffff81a6b576>] kmemleak_alloc_recursive include/linux/kmemleak.h:43 [inline]
    [<ffffffff81a6b576>] slab_post_alloc_hook+0x96/0x490 mm/slab.h:522
    [<ffffffff81a716d3>] slab_alloc_node mm/slub.c:3206 [inline]
    [<ffffffff81a716d3>] slab_alloc mm/slub.c:3214 [inline]
    [<ffffffff81a716d3>] kmem_cache_alloc_trace+0x163/0x370 mm/slub.c:3231
    [<ffffffff82e8681a>] kmalloc include/linux/slab.h:591 [inline]
    [<ffffffff82e8681a>] kzalloc include/linux/slab.h:721 [inline]
    [<ffffffff82e8681a>] mlxsw_sp_nexthop_obj_group_create drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c:4918 [inline]
    [<ffffffff82e8681a>] mlxsw_sp_nexthop_obj_new drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c:5054 [inline]
    [<ffffffff82e8681a>] mlxsw_sp_nexthop_obj_event+0x59a/0x2910 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c:5239
    [<ffffffff813ef67d>] notifier_call_chain+0xbd/0x210 kernel/notifier.c:83
    [<ffffffff813f0662>] blocking_notifier_call_chain kernel/notifier.c:318 [inline]
    [<ffffffff813f0662>] blocking_notifier_call_chain+0x72/0xa0 kernel/notifier.c:306
    [<ffffffff8384b9c6>] call_nexthop_notifiers+0x156/0x310 net/ipv4/nexthop.c:244
    [<ffffffff83852bd8>] insert_nexthop net/ipv4/nexthop.c:2336 [inline]
    [<ffffffff83852bd8>] nexthop_add net/ipv4/nexthop.c:2644 [inline]
    [<ffffffff83852bd8>] rtm_new_nexthop+0x14e8/0x4d10 net/ipv4/nexthop.c:2913
    [<ffffffff833e9a78>] rtnetlink_rcv_msg+0x448/0xbf0 net/core/rtnetlink.c:5572
    [<ffffffff83608703>] netlink_rcv_skb+0x173/0x480 net/netlink/af_netlink.c:2504
    [<ffffffff833de032>] rtnetlink_rcv+0x22/0x30 net/core/rtnetlink.c:5590
    [<ffffffff836069de>] netlink_unicast_kernel net/netlink/af_netlink.c:1314 [inline]
    [<ffffffff836069de>] netlink_unicast+0x5ae/0x7f0 net/netlink/af_netlink.c:1340
    [<ffffffff83607501>] netlink_sendmsg+0x8e1/0xe30 net/netlink/af_netlink.c:1929
    [<ffffffff832fde84>] sock_sendmsg_nosec net/socket.c:704 [inline]
    [<ffffffff832fde84>] sock_sendmsg net/socket.c:724 [inline]
    [<ffffffff832fde84>] ____sys_sendmsg+0x874/0x9f0 net/socket.c:2409
    [<ffffffff83304a44>] ___sys_sendmsg+0x104/0x170 net/socket.c:2463
    [<ffffffff83304c01>] __sys_sendmsg+0x111/0x1f0 net/socket.c:2492
    [<ffffffff83304d5d>] __do_sys_sendmsg net/socket.c:2501 [inline]
    [<ffffffff83304d5d>] __se_sys_sendmsg net/socket.c:2499 [inline]
    [<ffffffff83304d5d>] __x64_sys_sendmsg+0x7d/0xc0 net/socket.c:2499

Fixes: 2a014b200bbd ("mlxsw: spectrum_router: Add support for nexthop objects")
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c |  2 +-
 net/ipv4/nexthop.c  | 19 ++++++++++++++-----
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 5a8df5a195cb..141635a35c28 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -4756,12 +4756,12 @@ static void __net_exit vxlan_exit_batch_net(struct list_head *net_list)
 	LIST_HEAD(list);
 	unsigned int h;
 
-	rtnl_lock();
 	list_for_each_entry(net, net_list, exit_list) {
 		struct vxlan_net *vn = net_generic(net, vxlan_net_id);
 
 		unregister_nexthop_notifier(net, &vn->nexthop_notifier_block);
 	}
+	rtnl_lock();
 	list_for_each_entry(net, net_list, exit_list)
 		vxlan_destroy_tunnels(net, &list);
 
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 0e75fd3e57b4..9e8100728d46 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -3567,6 +3567,7 @@ static struct notifier_block nh_netdev_notifier = {
 };
 
 static int nexthops_dump(struct net *net, struct notifier_block *nb,
+			 enum nexthop_event_type event_type,
 			 struct netlink_ext_ack *extack)
 {
 	struct rb_root *root = &net->nexthop.rb_root;
@@ -3577,8 +3578,7 @@ static int nexthops_dump(struct net *net, struct notifier_block *nb,
 		struct nexthop *nh;
 
 		nh = rb_entry(node, struct nexthop, rb_node);
-		err = call_nexthop_notifier(nb, net, NEXTHOP_EVENT_REPLACE, nh,
-					    extack);
+		err = call_nexthop_notifier(nb, net, event_type, nh, extack);
 		if (err)
 			break;
 	}
@@ -3592,7 +3592,7 @@ int register_nexthop_notifier(struct net *net, struct notifier_block *nb,
 	int err;
 
 	rtnl_lock();
-	err = nexthops_dump(net, nb, extack);
+	err = nexthops_dump(net, nb, NEXTHOP_EVENT_REPLACE, extack);
 	if (err)
 		goto unlock;
 	err = blocking_notifier_chain_register(&net->nexthop.notifier_chain,
@@ -3605,8 +3605,17 @@ EXPORT_SYMBOL(register_nexthop_notifier);
 
 int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb)
 {
-	return blocking_notifier_chain_unregister(&net->nexthop.notifier_chain,
-						  nb);
+	int err;
+
+	rtnl_lock();
+	err = blocking_notifier_chain_unregister(&net->nexthop.notifier_chain,
+						 nb);
+	if (err)
+		goto unlock;
+	nexthops_dump(net, nb, NEXTHOP_EVENT_DEL, NULL);
+unlock:
+	rtnl_unlock();
+	return err;
 }
 EXPORT_SYMBOL(unregister_nexthop_notifier);
 
-- 
cgit v1.2.3


From 5b099870c8e0eb026a1560894d94f827832491cf Mon Sep 17 00:00:00 2001
From: Guvenc Gulce <guvenc@linux.ibm.com>
Date: Wed, 22 Sep 2021 19:21:29 +0200
Subject: MAINTAINERS: remove Guvenc Gulce as net/smc maintainer

Remove myself as net/smc maintainer, as I am
leaving IBM soon and can not maintain net/smc anymore.

Cc: Julian Wiedmann <jwi@linux.ibm.com>
Acked-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: Guvenc Gulce <guvenc@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index eeb4c70b3d5b..3c814976443e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16955,7 +16955,6 @@ F:	drivers/misc/sgi-xp/
 
 SHARED MEMORY COMMUNICATIONS (SMC) SOCKETS
 M:	Karsten Graul <kgraul@linux.ibm.com>
-M:	Guvenc Gulce <guvenc@linux.ibm.com>
 L:	linux-s390@vger.kernel.org
 S:	Supported
 W:	http://www.ibm.com/developerworks/linux/linux390/
-- 
cgit v1.2.3


From 31339440b2d0a4987030aac026adbaba44e22490 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Wed, 22 Sep 2021 19:30:37 +0100
Subject: nfc: st-nci: Add SPI ID matching DT compatible

Currently autoloading for SPI devices does not use the DT ID table, it uses
SPI modalises. Supporting OF modalises is going to be difficult if not
impractical, an attempt was made but has been reverted, so ensure that
module autoloading works for this driver by adding the part name used in
the compatible to the list of SPI IDs.

Fixes: 96c8395e2166 ("spi: Revert modalias changes")
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/nfc/st-nci/spi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/nfc/st-nci/spi.c b/drivers/nfc/st-nci/spi.c
index a620c34790e6..0875b773fb41 100644
--- a/drivers/nfc/st-nci/spi.c
+++ b/drivers/nfc/st-nci/spi.c
@@ -278,6 +278,7 @@ static int st_nci_spi_remove(struct spi_device *dev)
 
 static struct spi_device_id st_nci_spi_id_table[] = {
 	{ST_NCI_SPI_DRIVER_NAME, 0},
+	{"st21nfcb-spi", 0},
 	{}
 };
 MODULE_DEVICE_TABLE(spi, st_nci_spi_id_table);
-- 
cgit v1.2.3


From e68daf61ed13832aef8892200a874139700ca754 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 22 Sep 2021 16:55:48 -0700
Subject: net: ethernet: mtk_eth_soc: avoid creating duplicate offload entries

Sometimes multiple CLS_REPLACE calls are issued for the same connection.
rhashtable_insert_fast does not check for these duplicates, so multiple
hardware flow entries can be created.
Fix this by checking for an existing entry early

Fixes: 502e84e2382d ("net: ethernet: mtk_eth_soc: add flow offloading support")
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_ppe_offload.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
index b5f68f66d42a..7bb1f20002b5 100644
--- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
+++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
@@ -186,6 +186,9 @@ mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f)
 	int hash;
 	int i;
 
+	if (rhashtable_lookup(&eth->flow_table, &f->cookie, mtk_flow_ht_params))
+		return -EEXIST;
+
 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) {
 		struct flow_match_meta match;
 
-- 
cgit v1.2.3


From acc64f52afac15e9e44d9b5253271346841786e0 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 22 Sep 2021 19:03:38 -0700
Subject: net: mscc: ocelot: fix forwarding from BLOCKING ports remaining
 enabled

The blamed commit made the fatally incorrect assumption that ports which
aren't in the FORWARDING STP state should not have packets forwarded
towards them, and that is all that needs to be done.

However, that logic alone permits BLOCKING ports to forward to
FORWARDING ports, which of course allows packet storms to occur when
there is an L2 loop.

The ocelot_get_bridge_fwd_mask should not only ask "what can the bridge
do for you", but "what can you do for the bridge". This way, only
FORWARDING ports forward to the other FORWARDING ports from the same
bridging domain, and we are still compatible with the idea of multiple
bridges.

Fixes: df291e54ccca ("net: ocelot: support multiple bridges")
Suggested-by: Colin Foster <colin.foster@in-advantage.com>
Reported-by: Colin Foster <colin.foster@in-advantage.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Colin Foster <colin.foster@in-advantage.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mscc/ocelot.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 729ba826ba17..559177e6ded4 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -1293,14 +1293,19 @@ static u32 ocelot_get_bond_mask(struct ocelot *ocelot, struct net_device *bond,
 	return mask;
 }
 
-static u32 ocelot_get_bridge_fwd_mask(struct ocelot *ocelot,
+static u32 ocelot_get_bridge_fwd_mask(struct ocelot *ocelot, int src_port,
 				      struct net_device *bridge)
 {
+	struct ocelot_port *ocelot_port = ocelot->ports[src_port];
 	u32 mask = 0;
 	int port;
 
+	if (!ocelot_port || ocelot_port->bridge != bridge ||
+	    ocelot_port->stp_state != BR_STATE_FORWARDING)
+		return 0;
+
 	for (port = 0; port < ocelot->num_phys_ports; port++) {
-		struct ocelot_port *ocelot_port = ocelot->ports[port];
+		ocelot_port = ocelot->ports[port];
 
 		if (!ocelot_port)
 			continue;
@@ -1366,7 +1371,7 @@ void ocelot_apply_bridge_fwd_mask(struct ocelot *ocelot)
 			struct net_device *bridge = ocelot_port->bridge;
 			struct net_device *bond = ocelot_port->bond;
 
-			mask = ocelot_get_bridge_fwd_mask(ocelot, bridge);
+			mask = ocelot_get_bridge_fwd_mask(ocelot, port, bridge);
 			mask |= cpu_fwd_mask;
 			mask &= ~BIT(port);
 			if (bond) {
-- 
cgit v1.2.3


From fdbccea419dc782079ce5881d2705cc9e3881480 Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Thu, 23 Sep 2021 09:51:45 +0300
Subject: net/mlx4_en: Don't allow aRFS for encapsulated packets

Driver doesn't support aRFS for encapsulated packets, return early error
in such a case.

Fixes: 1eb8c695bda9 ("net/mlx4_en: Add accelerated RFS support")
Signed-off-by: Aya Levin <ayal@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 35154635ec3a..8af7f2827322 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -372,6 +372,9 @@ mlx4_en_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
 	int nhoff = skb_network_offset(skb);
 	int ret = 0;
 
+	if (skb->encapsulation)
+		return -EPROTONOSUPPORT;
+
 	if (skb->protocol != htons(ETH_P_IP))
 		return -EPROTONOSUPPORT;
 
-- 
cgit v1.2.3


From 4d88c339c423eefe2fd48215016cb0c75fcb4c4d Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <skalluru@marvell.com>
Date: Thu, 23 Sep 2021 03:16:05 -0700
Subject: atlantic: Fix issue in the pm resume flow.

After fixing hibernation resume flow, another usecase was found which
should be explicitly handled - resume when device is in "down" state.
Invoke aq_nic_init jointly with aq_nic_start only if ndev was already
up during suspend/hibernate. We still need to perform nic_deinit() if
caller requests for it, to handle the freeze/resume scenarios.

Fixes: 57f780f1c433 ("atlantic: Fix driver resume flow.")
Signed-off-by: Sudarsana Reddy Kalluru <skalluru@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
index dee9ff74d6d6..d4b1976ee69b 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
@@ -413,13 +413,13 @@ static int atl_resume_common(struct device *dev, bool deep)
 	if (deep) {
 		/* Reinitialize Nic/Vecs objects */
 		aq_nic_deinit(nic, !nic->aq_hw->aq_nic_cfg->wol);
+	}
 
+	if (netif_running(nic->ndev)) {
 		ret = aq_nic_init(nic);
 		if (ret)
 			goto err_exit;
-	}
 
-	if (netif_running(nic->ndev)) {
 		ret = aq_nic_start(nic);
 		if (ret)
 			goto err_exit;
-- 
cgit v1.2.3