From addf89774e48c992316449ffab4f29c2309ebefb Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 9 Sep 2024 21:17:40 +0800 Subject: [PATCH 01/25] ieee802154: Fix build error If REGMAP_SPI is m and IEEE802154_MCR20A is y, mcr20a.c:(.text+0x3ed6c5b): undefined reference to `__devm_regmap_init_spi' ld: mcr20a.c:(.text+0x3ed6cb5): undefined reference to `__devm_regmap_init_spi' Select REGMAP_SPI for IEEE802154_MCR20A to fix it. Fixes: 8c6ad9cc5157 ("ieee802154: Add NXP MCR20A IEEE 802.15.4 transceiver driver") Signed-off-by: Jinjie Ruan Link: https://lore.kernel.org/20240909131740.1296608-1-ruanjinjie@huawei.com Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ieee802154/Kconfig b/drivers/net/ieee802154/Kconfig index 95da876c561384..1075e24b11defc 100644 --- a/drivers/net/ieee802154/Kconfig +++ b/drivers/net/ieee802154/Kconfig @@ -101,6 +101,7 @@ config IEEE802154_CA8210_DEBUGFS config IEEE802154_MCR20A tristate "MCR20A transceiver driver" + select REGMAP_SPI depends on IEEE802154_DRIVERS && MAC802154 depends on SPI help From bff1709b3980bd7f80be6786f64cc9a9ee9e56da Mon Sep 17 00:00:00 2001 From: Jiawei Ye Date: Tue, 24 Sep 2024 06:58:05 +0000 Subject: [PATCH 02/25] mac802154: Fix potential RCU dereference issue in mac802154_scan_worker In the `mac802154_scan_worker` function, the `scan_req->type` field was accessed after the RCU read-side critical section was unlocked. According to RCU usage rules, this is illegal and can lead to unpredictable behavior, such as accessing memory that has been updated or causing use-after-free issues. This possible bug was identified using a static analysis tool developed by myself, specifically designed to detect RCU-related issues. To address this, the `scan_req->type` value is now stored in a local variable `scan_req_type` while still within the RCU read-side critical section. The `scan_req_type` is then used after the RCU lock is released, ensuring that the type value is safely accessed without violating RCU rules. Fixes: e2c3e6f53a7a ("mac802154: Handle active scanning") Cc: stable@vger.kernel.org Signed-off-by: Jiawei Ye Acked-by: Miquel Raynal Reviewed-by: Przemek Kitszel Link: https://lore.kernel.org/tencent_3B2F4F2B4DA30FAE2F51A9634A16B3AD4908@qq.com Signed-off-by: Stefan Schmidt --- net/mac802154/scan.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/mac802154/scan.c b/net/mac802154/scan.c index 1c0eeaa76560cd..a6dab3cc3ad858 100644 --- a/net/mac802154/scan.c +++ b/net/mac802154/scan.c @@ -176,6 +176,7 @@ void mac802154_scan_worker(struct work_struct *work) struct ieee802154_local *local = container_of(work, struct ieee802154_local, scan_work.work); struct cfg802154_scan_request *scan_req; + enum nl802154_scan_types scan_req_type; struct ieee802154_sub_if_data *sdata; unsigned int scan_duration = 0; struct wpan_phy *wpan_phy; @@ -209,6 +210,7 @@ void mac802154_scan_worker(struct work_struct *work) } wpan_phy = scan_req->wpan_phy; + scan_req_type = scan_req->type; scan_req_duration = scan_req->duration; /* Look for the next valid chan */ @@ -246,7 +248,7 @@ void mac802154_scan_worker(struct work_struct *work) goto end_scan; } - if (scan_req->type == NL802154_SCAN_ACTIVE) { + if (scan_req_type == NL802154_SCAN_ACTIVE) { ret = mac802154_transmit_beacon_req(local, sdata); if (ret) dev_err(&sdata->dev->dev, From 2bcae12c795f32ddfbf8c80d1b5f1d3286341c32 Mon Sep 17 00:00:00 2001 From: Gerd Bayer Date: Tue, 10 Sep 2024 10:53:51 +0200 Subject: [PATCH 03/25] net/mlx5: Fix error path in multi-packet WQE transmit Remove the erroneous unmap in case no DMA mapping was established The multi-packet WQE transmit code attempts to obtain a DMA mapping for the skb. This could fail, e.g. under memory pressure, when the IOMMU driver just can't allocate more memory for page tables. While the code tries to handle this in the path below the err_unmap label it erroneously unmaps one entry from the sq's FIFO list of active mappings. Since the current map attempt failed this unmap is removing some random DMA mapping that might still be required. If the PCI function now presents that IOVA, the IOMMU may assumes a rogue DMA access and e.g. on s390 puts the PCI function in error state. The erroneous behavior was seen in a stress-test environment that created memory pressure. Fixes: 5af75c747e2a ("net/mlx5e: Enhanced TX MPWQE for SKBs") Signed-off-by: Gerd Bayer Reviewed-by: Zhu Yanjun Acked-by: Maxim Mikityanskiy Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index b09e9abd39f37f..f8c7912abe0e3f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -642,7 +642,6 @@ mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, return; err_unmap: - mlx5e_dma_unmap_wqe_err(sq, 1); sq->stats->dropped++; dev_kfree_skb_any(skb); mlx5e_tx_flush(sq); From ec793155894140df7421d25903de2e6bc12c695b Mon Sep 17 00:00:00 2001 From: Mohamed Khalfella Date: Wed, 4 Sep 2024 22:02:48 -0600 Subject: [PATCH 04/25] net/mlx5: Added cond_resched() to crdump collection Collecting crdump involves reading vsc registers from pci config space of mlx device, which can take long time to complete. This might result in starving other threads waiting to run on the cpu. Numbers I got from testing ConnectX-5 Ex MCX516A-CDAT in the lab: - mlx5_vsc_gw_read_block_fast() was called with length = 1310716. - mlx5_vsc_gw_read_fast() reads 4 bytes at a time. It was not used to read the entire 1310716 bytes. It was called 53813 times because there are jumps in read_addr. - On average mlx5_vsc_gw_read_fast() took 35284.4ns. - In total mlx5_vsc_wait_on_flag() called vsc_read() 54707 times. The average time for each call was 17548.3ns. In some instances vsc_read() was called more than one time when the flag was not set. As expected the thread released the cpu after 16 iterations in mlx5_vsc_wait_on_flag(). - Total time to read crdump was 35284.4ns * 53813 ~= 1.898s. It was seen in the field that crdump can take more than 5 seconds to complete. During that time mlx5_vsc_wait_on_flag() did not release the cpu because it did not complete 16 iterations. It is believed that pci config reads were slow. Adding cond_resched() every 128 register read improves the situation. In the common case the, crdump takes ~1.8989s, the thread yields the cpu every ~4.51ms. If crdump takes ~5s, the thread yields the cpu every ~18.0ms. Fixes: 8b9d8baae1de ("net/mlx5: Add Crdump support") Reviewed-by: Yuanyuan Zhong Signed-off-by: Mohamed Khalfella Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c index d0b595ba611014..432c98f2626db9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c @@ -24,6 +24,11 @@ pci_write_config_dword((dev)->pdev, (dev)->vsc_addr + (offset), (val)) #define VSC_MAX_RETRIES 2048 +/* Reading VSC registers can take relatively long time. + * Yield the cpu every 128 registers read. + */ +#define VSC_GW_READ_BLOCK_COUNT 128 + enum { VSC_CTRL_OFFSET = 0x4, VSC_COUNTER_OFFSET = 0x8, @@ -273,6 +278,7 @@ int mlx5_vsc_gw_read_block_fast(struct mlx5_core_dev *dev, u32 *data, { unsigned int next_read_addr = 0; unsigned int read_addr = 0; + unsigned int count = 0; while (read_addr < length) { if (mlx5_vsc_gw_read_fast(dev, read_addr, &next_read_addr, @@ -280,6 +286,10 @@ int mlx5_vsc_gw_read_block_fast(struct mlx5_core_dev *dev, u32 *data, return read_addr; read_addr = next_read_addr; + if (++count == VSC_GW_READ_BLOCK_COUNT) { + cond_resched(); + count = 0; + } } return length; } From f25389e779500cf4a59ef9804534237841bce536 Mon Sep 17 00:00:00 2001 From: Elena Salomatkina Date: Tue, 24 Sep 2024 19:00:18 +0300 Subject: [PATCH 05/25] net/mlx5e: Fix NULL deref in mlx5e_tir_builder_alloc() In mlx5e_tir_builder_alloc() kvzalloc() may return NULL which is dereferenced on the next line in a reference to the modify field. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: a6696735d694 ("net/mlx5e: Convert TIR to a dedicated object") Signed-off-by: Elena Salomatkina Reviewed-by: Simon Horman Reviewed-by: Kalesh AP Reviewed-by: Tariq Toukan Reviewed-by: Gal Pressman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tir.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c index d4239e3b3c88ef..11f724ad90dbfb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c @@ -23,6 +23,9 @@ struct mlx5e_tir_builder *mlx5e_tir_builder_alloc(bool modify) struct mlx5e_tir_builder *builder; builder = kvzalloc(sizeof(*builder), GFP_KERNEL); + if (!builder) + return NULL; + builder->modify = modify; return builder; From 19da17010a55924f2b5540b0f61652cc5781af85 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Mon, 23 Sep 2024 11:44:30 +0300 Subject: [PATCH 06/25] net/mlx5: Fix wrong reserved field in hca_cap_2 in mlx5_ifc Fixing the wrong size of a field in hca_cap_2. The bug was introduced by adding new fields for HWS and not fixing the reserved field size. Fixes: 34c626c3004a ("net/mlx5: Added missing mlx5_ifc definition for HW Steering") Signed-off-by: Yevgeny Kliteynik Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 620a5c305123bb..04df1610736ef5 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2115,7 +2115,7 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 ts_cqe_metadata_size2wqe_counter[0x5]; u8 reserved_at_250[0x10]; - u8 reserved_at_260[0x120]; + u8 reserved_at_260[0x20]; u8 format_select_dw_gtpu_dw_0[0x8]; u8 format_select_dw_gtpu_dw_1[0x8]; From d8c561741ef83980114b3f7f95ffac54600f3f16 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Thu, 19 Sep 2024 12:17:59 +0300 Subject: [PATCH 07/25] net/mlx5: HWS, fixed double-free in error flow of creating SQ When SQ creation fails, call the appropriate mlx5_core destroy function. This fixes the following smatch warnings: divers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_send.c:739 hws_send_ring_open_sq() warn: 'sq->dep_wqe' double freed hws_send_ring_open_sq() warn: 'sq->wq_ctrl.buf.frags' double freed hws_send_ring_open_sq() warn: 'sq->wr_priv' double freed Fixes: 2ca62599aa0b ("net/mlx5: HWS, added send engine and context handling") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/e4ebc227-4b25-49bf-9e4c-14b7ea5c6a07@stanley.mountain/ Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/steering/hws/mlx5hws_send.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_send.c index a1adbb48735c10..0c7989184c3071 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_send.c @@ -653,6 +653,12 @@ static int hws_send_ring_create_sq(struct mlx5_core_dev *mdev, u32 pdn, return err; } +static void hws_send_ring_destroy_sq(struct mlx5_core_dev *mdev, + struct mlx5hws_send_ring_sq *sq) +{ + mlx5_core_destroy_sq(mdev, sq->sqn); +} + static int hws_send_ring_set_sq_rdy(struct mlx5_core_dev *mdev, u32 sqn) { void *in, *sqc; @@ -696,7 +702,7 @@ static int hws_send_ring_create_sq_rdy(struct mlx5_core_dev *mdev, u32 pdn, err = hws_send_ring_set_sq_rdy(mdev, sq->sqn); if (err) - hws_send_ring_close_sq(sq); + hws_send_ring_destroy_sq(mdev, sq); return err; } From d15525f300109fac5477dce1b8fef244c5dc9ec3 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Mon, 16 Sep 2024 14:13:39 +0300 Subject: [PATCH 08/25] net/mlx5: HWS, changed E2BIG error to a negative return code Fixed all the 'E2BIG' returns in error flow of functions to the negative '-E2BIG' as we are using negative error codes everywhere in HWS code. This also fixes the following smatch warnings: "warn: was negative '-E2BIG' intended?" Fixes: 74a778b4a63f ("net/mlx5: HWS, added definers handling") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/f8c77688-7d83-4937-baba-ac844dfe2e0b@stanley.mountain/ Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/steering/hws/mlx5hws_bwc_complex.c | 2 +- .../mellanox/mlx5/core/steering/hws/mlx5hws_definer.c | 4 ++-- .../mellanox/mlx5/core/steering/hws/mlx5hws_matcher.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_bwc_complex.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_bwc_complex.c index bb563f50ef09a2..601fad5fc54a39 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_bwc_complex.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_bwc_complex.c @@ -33,7 +33,7 @@ bool mlx5hws_bwc_match_params_is_complex(struct mlx5hws_context *ctx, * and let the usual match creation path handle it, * both for good and bad flows. */ - if (ret == E2BIG) { + if (ret == -E2BIG) { is_complex = true; mlx5hws_dbg(ctx, "Matcher definer layout: need complex matcher\n"); } else { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_definer.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_definer.c index 3bdb5c90efffa5..d566d2ddf42435 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_definer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_definer.c @@ -1845,7 +1845,7 @@ hws_definer_find_best_match_fit(struct mlx5hws_context *ctx, return 0; } - return E2BIG; + return -E2BIG; } static void @@ -1931,7 +1931,7 @@ mlx5hws_definer_calc_layout(struct mlx5hws_context *ctx, /* Find the match definer layout for header layout match union */ ret = hws_definer_find_best_match_fit(ctx, match_definer, match_hl); if (ret) { - if (ret == E2BIG) + if (ret == -E2BIG) mlx5hws_dbg(ctx, "Failed to create match definer from header layout - E2BIG\n"); else diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_matcher.c index 33d2b31e4b4645..61a1155d4b4fdb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_matcher.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_matcher.c @@ -675,7 +675,7 @@ static int hws_matcher_bind_mt(struct mlx5hws_matcher *matcher) if (!(matcher->flags & MLX5HWS_MATCHER_FLAGS_COLLISION)) { ret = mlx5hws_definer_mt_init(ctx, matcher->mt); if (ret) { - if (ret == E2BIG) + if (ret == -E2BIG) mlx5hws_err(ctx, "Failed to set matcher templates with match definers\n"); return ret; } From 023d2a43ed0d9ab73d4a35757121e4c8e01298e5 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Tue, 13 Aug 2024 13:34:54 +0300 Subject: [PATCH 09/25] net/mlx5e: SHAMPO, Fix overflow of hd_per_wq When having larger RQ sizes and small MTUs sizes, the hd_per_wq variable can overflow. Like in the following case: $> ethtool --set-ring eth1 rx 8192 $> ip link set dev eth1 mtu 144 $> ethtool --features eth1 rx-gro-hw on ... yields in dmesg: mlx5_core 0000:08:00.1: mlx5_cmd_out_err:808:(pid 194797): CREATE_MKEY(0x200) op_mod(0x0) failed, status bad parameter(0x3), syndrome (0x3bf6f), err(-22) because hd_per_wq is 64K which overflows to 0 and makes the command fail. This patch increases the variable size to 32 bit. Fixes: 99be56171fa9 ("net/mlx5e: SHAMPO, Re-enable HW-GRO") Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index da0a1c65ec4a3b..57b7298a0e793c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -627,7 +627,7 @@ struct mlx5e_shampo_hd { struct mlx5e_dma_info *info; struct mlx5e_frag_page *pages; u16 curr_page_index; - u16 hd_per_wq; + u32 hd_per_wq; u16 hd_per_wqe; unsigned long *bitmap; u16 pi; From 7b124695db40d5c9c5295a94ae928a8d67a01c3d Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Mon, 2 Sep 2024 09:40:58 +0300 Subject: [PATCH 10/25] net/mlx5e: Fix crash caused by calling __xfrm_state_delete() twice The km.state is not checked in driver's delayed work. When xfrm_state_check_expire() is called, the state can be reset to XFRM_STATE_EXPIRED, even if it is XFRM_STATE_DEAD already. This happens when xfrm state is deleted, but not freed yet. As __xfrm_state_delete() is called again in xfrm timer, the following crash occurs. To fix this issue, skip xfrm_state_check_expire() if km.state is not XFRM_STATE_VALID. Oops: general protection fault, probably for non-canonical address 0xdead000000000108: 0000 [#1] SMP CPU: 5 UID: 0 PID: 7448 Comm: kworker/u102:2 Not tainted 6.11.0-rc2+ #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Workqueue: mlx5e_ipsec: eth%d mlx5e_ipsec_handle_sw_limits [mlx5_core] RIP: 0010:__xfrm_state_delete+0x3d/0x1b0 Code: 0f 84 8b 01 00 00 48 89 fd c6 87 c8 00 00 00 05 48 8d bb 40 10 00 00 e8 11 04 1a 00 48 8b 95 b8 00 00 00 48 8b 85 c0 00 00 00 <48> 89 42 08 48 89 10 48 8b 55 10 48 b8 00 01 00 00 00 00 ad de 48 RSP: 0018:ffff88885f945ec8 EFLAGS: 00010246 RAX: dead000000000122 RBX: ffffffff82afa940 RCX: 0000000000000036 RDX: dead000000000100 RSI: 0000000000000000 RDI: ffffffff82afb980 RBP: ffff888109a20340 R08: ffff88885f945ea0 R09: 0000000000000000 R10: 0000000000000000 R11: ffff88885f945ff8 R12: 0000000000000246 R13: ffff888109a20340 R14: ffff88885f95f420 R15: ffff88885f95f400 FS: 0000000000000000(0000) GS:ffff88885f940000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f2163102430 CR3: 00000001128d6001 CR4: 0000000000370eb0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? die_addr+0x33/0x90 ? exc_general_protection+0x1a2/0x390 ? asm_exc_general_protection+0x22/0x30 ? __xfrm_state_delete+0x3d/0x1b0 ? __xfrm_state_delete+0x2f/0x1b0 xfrm_timer_handler+0x174/0x350 ? __xfrm_state_delete+0x1b0/0x1b0 __hrtimer_run_queues+0x121/0x270 hrtimer_run_softirq+0x88/0xd0 handle_softirqs+0xcc/0x270 do_softirq+0x3c/0x50 __local_bh_enable_ip+0x47/0x50 mlx5e_ipsec_handle_sw_limits+0x7d/0x90 [mlx5_core] process_one_work+0x137/0x2d0 worker_thread+0x28d/0x3a0 ? rescuer_thread+0x480/0x480 kthread+0xb8/0xe0 ? kthread_park+0x80/0x80 ret_from_fork+0x2d/0x50 ? kthread_park+0x80/0x80 ret_from_fork_asm+0x11/0x20 Fixes: b2f7b01d36a9 ("net/mlx5e: Simulate missing IPsec TX limits hardware functionality") Signed-off-by: Jianbo Liu Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 3d274599015be1..ca92e518be7669 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -67,7 +67,6 @@ static void mlx5e_ipsec_handle_sw_limits(struct work_struct *_work) return; spin_lock_bh(&x->lock); - xfrm_state_check_expire(x); if (x->km.state == XFRM_STATE_EXPIRED) { sa_entry->attrs.drop = true; spin_unlock_bh(&x->lock); @@ -75,6 +74,13 @@ static void mlx5e_ipsec_handle_sw_limits(struct work_struct *_work) mlx5e_accel_ipsec_fs_modify(sa_entry); return; } + + if (x->km.state != XFRM_STATE_VALID) { + spin_unlock_bh(&x->lock); + return; + } + + xfrm_state_check_expire(x); spin_unlock_bh(&x->lock); queue_delayed_work(sa_entry->ipsec->wq, &dwork->dwork, From 09573b1cc76e7ff8f056ab29ea1cdc152ec8c653 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Wed, 11 Sep 2024 17:42:34 +0800 Subject: [PATCH 11/25] net: ieee802154: mcr20a: Use IRQF_NO_AUTOEN flag in request_irq() disable_irq() after request_irq() still has a time gap in which interrupts can come. request_irq() with IRQF_NO_AUTOEN flag will disable IRQ auto-enable when request IRQ. Fixes: 8c6ad9cc5157 ("ieee802154: Add NXP MCR20A IEEE 802.15.4 transceiver driver") Reviewed-by: Miquel Raynal Signed-off-by: Jinjie Ruan Link: https://lore.kernel.org/20240911094234.1922418-1-ruanjinjie@huawei.com Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/mcr20a.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ieee802154/mcr20a.c b/drivers/net/ieee802154/mcr20a.c index 433fb583920310..020d392a98b69d 100644 --- a/drivers/net/ieee802154/mcr20a.c +++ b/drivers/net/ieee802154/mcr20a.c @@ -1302,16 +1302,13 @@ mcr20a_probe(struct spi_device *spi) irq_type = IRQF_TRIGGER_FALLING; ret = devm_request_irq(&spi->dev, spi->irq, mcr20a_irq_isr, - irq_type, dev_name(&spi->dev), lp); + irq_type | IRQF_NO_AUTOEN, dev_name(&spi->dev), lp); if (ret) { dev_err(&spi->dev, "could not request_irq for mcr20a\n"); ret = -ENODEV; goto free_dev; } - /* disable_irq by default and wait for starting hardware */ - disable_irq(spi->irq); - ret = ieee802154_register_hw(hw); if (ret) { dev_crit(&spi->dev, "ieee802154_register_hw failed\n"); From f53e1c9c726d83092167f2226f32bd3b73f26c21 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 12 Sep 2024 12:34:42 -0400 Subject: [PATCH 12/25] Bluetooth: MGMT: Fix possible crash on mgmt_index_removed If mgmt_index_removed is called while there are commands queued on cmd_sync it could lead to crashes like the bellow trace: 0x0000053D: __list_del_entry_valid_or_report+0x98/0xdc 0x0000053D: mgmt_pending_remove+0x18/0x58 [bluetooth] 0x0000053E: mgmt_remove_adv_monitor_complete+0x80/0x108 [bluetooth] 0x0000053E: hci_cmd_sync_work+0xbc/0x164 [bluetooth] So while handling mgmt_index_removed this attempts to dequeue commands passed as user_data to cmd_sync. Fixes: 7cf5c2978f23 ("Bluetooth: hci_sync: Refactor remove Adv Monitor") Reported-by: jiaymao Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/mgmt.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index e4f564d6f6fbfb..4157d9f23f46ef 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1453,10 +1453,15 @@ static void cmd_status_rsp(struct mgmt_pending_cmd *cmd, void *data) static void cmd_complete_rsp(struct mgmt_pending_cmd *cmd, void *data) { - if (cmd->cmd_complete) { - u8 *status = data; + struct cmd_lookup *match = data; + + /* dequeue cmd_sync entries using cmd as data as that is about to be + * removed/freed. + */ + hci_cmd_sync_dequeue(match->hdev, NULL, cmd, NULL); - cmd->cmd_complete(cmd, *status); + if (cmd->cmd_complete) { + cmd->cmd_complete(cmd, match->mgmt_status); mgmt_pending_remove(cmd); return; @@ -9394,12 +9399,12 @@ void mgmt_index_added(struct hci_dev *hdev) void mgmt_index_removed(struct hci_dev *hdev) { struct mgmt_ev_ext_index ev; - u8 status = MGMT_STATUS_INVALID_INDEX; + struct cmd_lookup match = { NULL, hdev, MGMT_STATUS_INVALID_INDEX }; if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) return; - mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status); + mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &match); if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { mgmt_index_event(MGMT_EV_UNCONF_INDEX_REMOVED, hdev, NULL, 0, @@ -9450,7 +9455,7 @@ void mgmt_power_on(struct hci_dev *hdev, int err) void __mgmt_power_off(struct hci_dev *hdev) { struct cmd_lookup match = { NULL, hdev }; - u8 status, zero_cod[] = { 0, 0, 0 }; + u8 zero_cod[] = { 0, 0, 0 }; mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match); @@ -9462,11 +9467,11 @@ void __mgmt_power_off(struct hci_dev *hdev) * status responses. */ if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) - status = MGMT_STATUS_INVALID_INDEX; + match.mgmt_status = MGMT_STATUS_INVALID_INDEX; else - status = MGMT_STATUS_NOT_POWERED; + match.mgmt_status = MGMT_STATUS_NOT_POWERED; - mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status); + mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &match); if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0) { mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, From 333b4fd11e89b29c84c269123f871883a30be586 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 23 Sep 2024 12:47:39 -0400 Subject: [PATCH 13/25] Bluetooth: L2CAP: Fix uaf in l2cap_connect [Syzbot reported] BUG: KASAN: slab-use-after-free in l2cap_connect.constprop.0+0x10d8/0x1270 net/bluetooth/l2cap_core.c:3949 Read of size 8 at addr ffff8880241e9800 by task kworker/u9:0/54 CPU: 0 UID: 0 PID: 54 Comm: kworker/u9:0 Not tainted 6.11.0-rc6-syzkaller-00268-g788220eee30d #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/06/2024 Workqueue: hci2 hci_rx_work Call Trace: __dump_stack lib/dump_stack.c:93 [inline] dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:119 print_address_description mm/kasan/report.c:377 [inline] print_report+0xc3/0x620 mm/kasan/report.c:488 kasan_report+0xd9/0x110 mm/kasan/report.c:601 l2cap_connect.constprop.0+0x10d8/0x1270 net/bluetooth/l2cap_core.c:3949 l2cap_connect_req net/bluetooth/l2cap_core.c:4080 [inline] l2cap_bredr_sig_cmd net/bluetooth/l2cap_core.c:4772 [inline] l2cap_sig_channel net/bluetooth/l2cap_core.c:5543 [inline] l2cap_recv_frame+0xf0b/0x8eb0 net/bluetooth/l2cap_core.c:6825 l2cap_recv_acldata+0x9b4/0xb70 net/bluetooth/l2cap_core.c:7514 hci_acldata_packet net/bluetooth/hci_core.c:3791 [inline] hci_rx_work+0xaab/0x1610 net/bluetooth/hci_core.c:4028 process_one_work+0x9c5/0x1b40 kernel/workqueue.c:3231 process_scheduled_works kernel/workqueue.c:3312 [inline] worker_thread+0x6c8/0xed0 kernel/workqueue.c:3389 kthread+0x2c1/0x3a0 kernel/kthread.c:389 ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 ... Freed by task 5245: kasan_save_stack+0x33/0x60 mm/kasan/common.c:47 kasan_save_track+0x14/0x30 mm/kasan/common.c:68 kasan_save_free_info+0x3b/0x60 mm/kasan/generic.c:579 poison_slab_object+0xf7/0x160 mm/kasan/common.c:240 __kasan_slab_free+0x32/0x50 mm/kasan/common.c:256 kasan_slab_free include/linux/kasan.h:184 [inline] slab_free_hook mm/slub.c:2256 [inline] slab_free mm/slub.c:4477 [inline] kfree+0x12a/0x3b0 mm/slub.c:4598 l2cap_conn_free net/bluetooth/l2cap_core.c:1810 [inline] kref_put include/linux/kref.h:65 [inline] l2cap_conn_put net/bluetooth/l2cap_core.c:1822 [inline] l2cap_conn_del+0x59d/0x730 net/bluetooth/l2cap_core.c:1802 l2cap_connect_cfm+0x9e6/0xf80 net/bluetooth/l2cap_core.c:7241 hci_connect_cfm include/net/bluetooth/hci_core.h:1960 [inline] hci_conn_failed+0x1c3/0x370 net/bluetooth/hci_conn.c:1265 hci_abort_conn_sync+0x75a/0xb50 net/bluetooth/hci_sync.c:5583 abort_conn_sync+0x197/0x360 net/bluetooth/hci_conn.c:2917 hci_cmd_sync_work+0x1a4/0x410 net/bluetooth/hci_sync.c:328 process_one_work+0x9c5/0x1b40 kernel/workqueue.c:3231 process_scheduled_works kernel/workqueue.c:3312 [inline] worker_thread+0x6c8/0xed0 kernel/workqueue.c:3389 kthread+0x2c1/0x3a0 kernel/kthread.c:389 ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 Reported-by: syzbot+c12e2f941af1feb5632c@syzkaller.appspotmail.com Tested-by: syzbot+c12e2f941af1feb5632c@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=c12e2f941af1feb5632c Fixes: 7b064edae38d ("Bluetooth: Fix authentication if acl data comes before remote feature evt") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_core.c | 2 ++ net/bluetooth/hci_event.c | 2 +- net/bluetooth/l2cap_core.c | 8 -------- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index d6976db02c06c7..b2f8f9c5b61066 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3782,6 +3782,8 @@ static void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_lock(hdev); conn = hci_conn_hash_lookup_handle(hdev, handle); + if (conn && hci_dev_test_flag(hdev, HCI_MGMT)) + mgmt_device_connected(hdev, conn, NULL, 0); hci_dev_unlock(hdev); if (conn) { diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 1c82dcdf6e8fc7..b87c0f1dab9e35 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3706,7 +3706,7 @@ static void hci_remote_features_evt(struct hci_dev *hdev, void *data, goto unlock; } - if (!ev->status && !test_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags)) { + if (!ev->status) { struct hci_cp_remote_name_req cp; memset(&cp, 0, sizeof(cp)); bacpy(&cp.bdaddr, &conn->dst); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 9988ba382b686a..6544c1ed714344 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -4066,17 +4066,9 @@ static void l2cap_connect(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, static int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) { - struct hci_dev *hdev = conn->hcon->hdev; - struct hci_conn *hcon = conn->hcon; - if (cmd_len < sizeof(struct l2cap_conn_req)) return -EPROTO; - hci_dev_lock(hdev); - if (hci_dev_test_flag(hdev, HCI_MGMT)) - mgmt_device_connected(hdev, hcon, NULL, 0); - hci_dev_unlock(hdev); - l2cap_connect(conn, cmd, data, L2CAP_CONN_RSP); return 0; } From 7b1ab460592ca818e7b52f27cd3ec86af79220d1 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Thu, 12 Sep 2024 11:12:04 +0800 Subject: [PATCH 14/25] Bluetooth: btmrvl: Use IRQF_NO_AUTOEN flag in request_irq() disable_irq() after request_irq() still has a time gap in which interrupts can come. request_irq() with IRQF_NO_AUTOEN flag will disable IRQ auto-enable when request IRQ. Fixes: bb7f4f0bcee6 ("btmrvl: add platform specific wakeup interrupt support") Signed-off-by: Jinjie Ruan Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btmrvl_sdio.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/bluetooth/btmrvl_sdio.c b/drivers/bluetooth/btmrvl_sdio.c index 85b7f2bb425982..07cd308f7abf6d 100644 --- a/drivers/bluetooth/btmrvl_sdio.c +++ b/drivers/bluetooth/btmrvl_sdio.c @@ -92,7 +92,7 @@ static int btmrvl_sdio_probe_of(struct device *dev, } else { ret = devm_request_irq(dev, cfg->irq_bt, btmrvl_wake_irq_bt, - 0, "bt_wake", card); + IRQF_NO_AUTOEN, "bt_wake", card); if (ret) { dev_err(dev, "Failed to request irq_bt %d (%d)\n", @@ -101,7 +101,6 @@ static int btmrvl_sdio_probe_of(struct device *dev, /* Configure wakeup (enabled by default) */ device_init_wakeup(dev, true); - disable_irq(cfg->irq_bt); } } From b25e11f978b63cb7857890edb3a698599cddb10e Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 12 Sep 2024 12:17:00 -0400 Subject: [PATCH 15/25] Bluetooth: hci_event: Align BR/EDR JUST_WORKS paring with LE This aligned BR/EDR JUST_WORKS method with LE which since 92516cd97fd4 ("Bluetooth: Always request for user confirmation for Just Works") always request user confirmation with confirm_hint set since the likes of bluetoothd have dedicated policy around JUST_WORKS method (e.g. main.conf:JustWorksRepairing). CVE: CVE-2024-8805 Cc: stable@vger.kernel.org Fixes: ba15a58b179e ("Bluetooth: Fix SSP acceptor just-works confirmation without MITM") Signed-off-by: Luiz Augusto von Dentz Tested-by: Kiran K --- net/bluetooth/hci_event.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index b87c0f1dab9e35..561c8cb87473ef 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -5324,19 +5324,16 @@ static void hci_user_confirm_request_evt(struct hci_dev *hdev, void *data, goto unlock; } - /* If no side requires MITM protection; auto-accept */ + /* If no side requires MITM protection; use JUST_CFM method */ if ((!loc_mitm || conn->remote_cap == HCI_IO_NO_INPUT_OUTPUT) && (!rem_mitm || conn->io_capability == HCI_IO_NO_INPUT_OUTPUT)) { - /* If we're not the initiators request authorization to - * proceed from user space (mgmt_user_confirm with - * confirm_hint set to 1). The exception is if neither - * side had MITM or if the local IO capability is - * NoInputNoOutput, in which case we do auto-accept + /* If we're not the initiator of request authorization and the + * local IO capability is not NoInputNoOutput, use JUST_WORKS + * method (mgmt_user_confirm with confirm_hint set to 1). */ if (!test_bit(HCI_CONN_AUTH_PEND, &conn->flags) && - conn->io_capability != HCI_IO_NO_INPUT_OUTPUT && - (loc_mitm || rem_mitm)) { + conn->io_capability != HCI_IO_NO_INPUT_OUTPUT) { bt_dev_dbg(hdev, "Confirming auto-accept as acceptor"); confirm_hint = 1; goto confirm; From 17bd3bd82f9f79f3feba15476c2b2c95a9b11ff8 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 26 Sep 2024 10:53:14 +0200 Subject: [PATCH 16/25] net: gso: fix tcp fraglist segmentation after pull from frag_list Detect tcp gso fraglist skbs with corrupted geometry (see below) and pass these to skb_segment instead of skb_segment_list, as the first can segment them correctly. Valid SKB_GSO_FRAGLIST skbs - consist of two or more segments - the head_skb holds the protocol headers plus first gso_size - one or more frag_list skbs hold exactly one segment - all but the last must be gso_size Optional datapath hooks such as NAT and BPF (bpf_skb_pull_data) can modify these skbs, breaking these invariants. In extreme cases they pull all data into skb linear. For TCP, this causes a NULL ptr deref in __tcpv4_gso_segment_list_csum at tcp_hdr(seg->next). Detect invalid geometry due to pull, by checking head_skb size. Don't just drop, as this may blackhole a destination. Convert to be able to pass to regular skb_segment. Approach and description based on a patch by Willem de Bruijn. Link: https://lore.kernel.org/netdev/20240428142913.18666-1-shiming.cheng@mediatek.com/ Link: https://lore.kernel.org/netdev/20240922150450.3873767-1-willemdebruijn.kernel@gmail.com/ Fixes: bee88cd5bd83 ("net: add support for segmenting TCP fraglist GSO packets") Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20240926085315.51524-1-nbd@nbd.name Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_offload.c | 10 ++++++++-- net/ipv6/tcpv6_offload.c | 10 ++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index e4ad3311e14895..2308665b51c538 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -101,8 +101,14 @@ static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb, if (!pskb_may_pull(skb, sizeof(struct tcphdr))) return ERR_PTR(-EINVAL); - if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST) - return __tcp4_gso_segment_list(skb, features); + if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST) { + struct tcphdr *th = tcp_hdr(skb); + + if (skb_pagelen(skb) - th->doff * 4 == skb_shinfo(skb)->gso_size) + return __tcp4_gso_segment_list(skb, features); + + skb->ip_summed = CHECKSUM_NONE; + } if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { const struct iphdr *iph = ip_hdr(skb); diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index 23971903e66de8..a45bf17cb2a172 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -159,8 +159,14 @@ static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb, if (!pskb_may_pull(skb, sizeof(*th))) return ERR_PTR(-EINVAL); - if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST) - return __tcp6_gso_segment_list(skb, features); + if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST) { + struct tcphdr *th = tcp_hdr(skb); + + if (skb_pagelen(skb) - th->doff * 4 == skb_shinfo(skb)->gso_size) + return __tcp6_gso_segment_list(skb, features); + + skb->ip_summed = CHECKSUM_NONE; + } if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { const struct ipv6hdr *ipv6h = ipv6_hdr(skb); From 49d14b54a527289d09a9480f214b8c586322310a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Sep 2024 16:58:36 +0000 Subject: [PATCH 17/25] net: test for not too small csum_start in virtio_net_hdr_to_skb() syzbot was able to trigger this warning [1], after injecting a malicious packet through af_packet, setting skb->csum_start and thus the transport header to an incorrect value. We can at least make sure the transport header is after the end of the network header (with a estimated minimal size). [1] [ 67.873027] skb len=4096 headroom=16 headlen=14 tailroom=0 mac=(-1,-1) mac_len=0 net=(16,-6) trans=10 shinfo(txflags=0 nr_frags=1 gso(size=0 type=0 segs=0)) csum(0xa start=10 offset=0 ip_summed=3 complete_sw=0 valid=0 level=0) hash(0x0 sw=0 l4=0) proto=0x0800 pkttype=0 iif=0 priority=0x0 mark=0x0 alloc_cpu=10 vlan_all=0x0 encapsulation=0 inner(proto=0x0000, mac=0, net=0, trans=0) [ 67.877172] dev name=veth0_vlan feat=0x000061164fdd09e9 [ 67.877764] sk family=17 type=3 proto=0 [ 67.878279] skb linear: 00000000: 00 00 10 00 00 00 00 00 0f 00 00 00 08 00 [ 67.879128] skb frag: 00000000: 0e 00 07 00 00 00 28 00 08 80 1c 00 04 00 00 02 [ 67.879877] skb frag: 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 67.880647] skb frag: 00000020: 00 00 02 00 00 00 08 00 1b 00 00 00 00 00 00 00 [ 67.881156] skb frag: 00000030: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 67.881753] skb frag: 00000040: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 67.882173] skb frag: 00000050: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 67.882790] skb frag: 00000060: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 67.883171] skb frag: 00000070: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 67.883733] skb frag: 00000080: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 67.884206] skb frag: 00000090: 00 00 00 00 00 00 00 00 00 00 69 70 76 6c 61 6e [ 67.884704] skb frag: 000000a0: 31 00 00 00 00 00 00 00 00 00 2b 00 00 00 00 00 [ 67.885139] skb frag: 000000b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 67.885677] skb frag: 000000c0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 67.886042] skb frag: 000000d0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 67.886408] skb frag: 000000e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 67.887020] skb frag: 000000f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 67.887384] skb frag: 00000100: 00 00 [ 67.887878] ------------[ cut here ]------------ [ 67.887908] offset (-6) >= skb_headlen() (14) [ 67.888445] WARNING: CPU: 10 PID: 2088 at net/core/dev.c:3332 skb_checksum_help (net/core/dev.c:3332 (discriminator 2)) [ 67.889353] Modules linked in: macsec macvtap macvlan hsr wireguard curve25519_x86_64 libcurve25519_generic libchacha20poly1305 chacha_x86_64 libchacha poly1305_x86_64 dummy bridge sr_mod cdrom evdev pcspkr i2c_piix4 9pnet_virtio 9p 9pnet netfs [ 67.890111] CPU: 10 UID: 0 PID: 2088 Comm: b363492833 Not tainted 6.11.0-virtme #1011 [ 67.890183] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 [ 67.890309] RIP: 0010:skb_checksum_help (net/core/dev.c:3332 (discriminator 2)) [ 67.891043] Call Trace: [ 67.891173] [ 67.891274] ? __warn (kernel/panic.c:741) [ 67.891320] ? skb_checksum_help (net/core/dev.c:3332 (discriminator 2)) [ 67.891333] ? report_bug (lib/bug.c:180 lib/bug.c:219) [ 67.891348] ? handle_bug (arch/x86/kernel/traps.c:239) [ 67.891363] ? exc_invalid_op (arch/x86/kernel/traps.c:260 (discriminator 1)) [ 67.891372] ? asm_exc_invalid_op (./arch/x86/include/asm/idtentry.h:621) [ 67.891388] ? skb_checksum_help (net/core/dev.c:3332 (discriminator 2)) [ 67.891399] ? skb_checksum_help (net/core/dev.c:3332 (discriminator 2)) [ 67.891416] ip_do_fragment (net/ipv4/ip_output.c:777 (discriminator 1)) [ 67.891448] ? __ip_local_out (./include/linux/skbuff.h:1146 ./include/net/l3mdev.h:196 ./include/net/l3mdev.h:213 net/ipv4/ip_output.c:113) [ 67.891459] ? __pfx_ip_finish_output2 (net/ipv4/ip_output.c:200) [ 67.891470] ? ip_route_output_flow (./arch/x86/include/asm/preempt.h:84 (discriminator 13) ./include/linux/rcupdate.h:96 (discriminator 13) ./include/linux/rcupdate.h:871 (discriminator 13) net/ipv4/route.c:2625 (discriminator 13) ./include/net/route.h:141 (discriminator 13) net/ipv4/route.c:2852 (discriminator 13)) [ 67.891484] ipvlan_process_v4_outbound (drivers/net/ipvlan/ipvlan_core.c:445 (discriminator 1)) [ 67.891581] ipvlan_queue_xmit (drivers/net/ipvlan/ipvlan_core.c:542 drivers/net/ipvlan/ipvlan_core.c:604 drivers/net/ipvlan/ipvlan_core.c:670) [ 67.891596] ipvlan_start_xmit (drivers/net/ipvlan/ipvlan_main.c:227) [ 67.891607] dev_hard_start_xmit (./include/linux/netdevice.h:4916 ./include/linux/netdevice.h:4925 net/core/dev.c:3588 net/core/dev.c:3604) [ 67.891620] __dev_queue_xmit (net/core/dev.h:168 (discriminator 25) net/core/dev.c:4425 (discriminator 25)) [ 67.891630] ? skb_copy_bits (./include/linux/uaccess.h:233 (discriminator 1) ./include/linux/uaccess.h:260 (discriminator 1) ./include/linux/highmem-internal.h:230 (discriminator 1) net/core/skbuff.c:3018 (discriminator 1)) [ 67.891645] ? __pskb_pull_tail (net/core/skbuff.c:2848 (discriminator 4)) [ 67.891655] ? skb_partial_csum_set (net/core/skbuff.c:5657) [ 67.891666] ? virtio_net_hdr_to_skb.constprop.0 (./include/linux/skbuff.h:2791 (discriminator 3) ./include/linux/skbuff.h:2799 (discriminator 3) ./include/linux/virtio_net.h:109 (discriminator 3)) [ 67.891684] packet_sendmsg (net/packet/af_packet.c:3145 (discriminator 1) net/packet/af_packet.c:3177 (discriminator 1)) [ 67.891700] ? _raw_spin_lock_bh (./arch/x86/include/asm/atomic.h:107 (discriminator 4) ./include/linux/atomic/atomic-arch-fallback.h:2170 (discriminator 4) ./include/linux/atomic/atomic-instrumented.h:1302 (discriminator 4) ./include/asm-generic/qspinlock.h:111 (discriminator 4) ./include/linux/spinlock.h:187 (discriminator 4) ./include/linux/spinlock_api_smp.h:127 (discriminator 4) kernel/locking/spinlock.c:178 (discriminator 4)) [ 67.891716] __sys_sendto (net/socket.c:730 (discriminator 1) net/socket.c:745 (discriminator 1) net/socket.c:2210 (discriminator 1)) [ 67.891734] ? do_sock_setsockopt (net/socket.c:2335) [ 67.891747] ? __sys_setsockopt (./include/linux/file.h:34 net/socket.c:2355) [ 67.891761] __x64_sys_sendto (net/socket.c:2222 (discriminator 1) net/socket.c:2218 (discriminator 1) net/socket.c:2218 (discriminator 1)) [ 67.891772] do_syscall_64 (arch/x86/entry/common.c:52 (discriminator 1) arch/x86/entry/common.c:83 (discriminator 1)) [ 67.891785] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) Fixes: 9181d6f8a2bb ("net: add more sanity check in virtio_net_hdr_to_skb()") Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20240926165836.3797406-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/virtio_net.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 276ca543ef44d8..02a9f4dc594d02 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -103,8 +103,10 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, if (!skb_partial_csum_set(skb, start, off)) return -EINVAL; + if (skb_transport_offset(skb) < nh_min_len) + return -EINVAL; - nh_min_len = max_t(u32, nh_min_len, skb_transport_offset(skb)); + nh_min_len = skb_transport_offset(skb); p_off = nh_min_len + thlen; if (!pskb_may_pull(skb, p_off)) return -EINVAL; From 8ed7cf66f4841bcc8c15a89be0732b933703b51c Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 27 Sep 2024 12:13:49 +0800 Subject: [PATCH 18/25] selftests: rds: move include.sh to TEST_FILES The include.sh file is generated for inclusion and should not be executable. Otherwise, it will be added to kselftest-list.txt. Additionally, add the executable bit for test.py at the same time to ensure proper functionality. Fixes: 3ade6ce1255e ("selftests: rds: add testing infrastructure") Signed-off-by: Hangbin Liu Link: https://patch.msgid.link/20240927041349.81216-1-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/rds/Makefile | 3 ++- tools/testing/selftests/net/rds/test.py | 0 2 files changed, 2 insertions(+), 1 deletion(-) mode change 100644 => 100755 tools/testing/selftests/net/rds/test.py diff --git a/tools/testing/selftests/net/rds/Makefile b/tools/testing/selftests/net/rds/Makefile index da9714bc7aad32..cf30307a829b23 100644 --- a/tools/testing/selftests/net/rds/Makefile +++ b/tools/testing/selftests/net/rds/Makefile @@ -4,9 +4,10 @@ all: @echo mk_build_dir="$(shell pwd)" > include.sh TEST_PROGS := run.sh \ - include.sh \ test.py +TEST_FILES := include.sh + EXTRA_CLEAN := /tmp/rds_logs include ../../lib.mk diff --git a/tools/testing/selftests/net/rds/test.py b/tools/testing/selftests/net/rds/test.py old mode 100644 new mode 100755 From aec7291003df78cb71fd461d7b672912bde55807 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 27 Sep 2024 07:45:53 +0000 Subject: [PATCH 19/25] ppp: do not assume bh is held in ppp_channel_bridge_input() Networking receive path is usually handled from BH handler. However, some protocols need to acquire the socket lock, and packets might be stored in the socket backlog is the socket was owned by a user process. In this case, release_sock(), __release_sock(), and sk_backlog_rcv() might call the sk->sk_backlog_rcv() handler in process context. sybot caught ppp was not considering this case in ppp_channel_bridge_input() : WARNING: inconsistent lock state 6.11.0-rc7-syzkaller-g5f5673607153 #0 Not tainted -------------------------------- inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. ksoftirqd/1/24 [HC0[0]:SC1[1]:HE1:SE0] takes: ffff0000db7f11e0 (&pch->downl){+.?.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline] ffff0000db7f11e0 (&pch->downl){+.?.}-{2:2}, at: ppp_channel_bridge_input drivers/net/ppp/ppp_generic.c:2272 [inline] ffff0000db7f11e0 (&pch->downl){+.?.}-{2:2}, at: ppp_input+0x16c/0x854 drivers/net/ppp/ppp_generic.c:2304 {SOFTIRQ-ON-W} state was registered at: lock_acquire+0x240/0x728 kernel/locking/lockdep.c:5759 __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline] _raw_spin_lock+0x48/0x60 kernel/locking/spinlock.c:154 spin_lock include/linux/spinlock.h:351 [inline] ppp_channel_bridge_input drivers/net/ppp/ppp_generic.c:2272 [inline] ppp_input+0x16c/0x854 drivers/net/ppp/ppp_generic.c:2304 pppoe_rcv_core+0xfc/0x314 drivers/net/ppp/pppoe.c:379 sk_backlog_rcv include/net/sock.h:1111 [inline] __release_sock+0x1a8/0x3d8 net/core/sock.c:3004 release_sock+0x68/0x1b8 net/core/sock.c:3558 pppoe_sendmsg+0xc8/0x5d8 drivers/net/ppp/pppoe.c:903 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] __sys_sendto+0x374/0x4f4 net/socket.c:2204 __do_sys_sendto net/socket.c:2216 [inline] __se_sys_sendto net/socket.c:2212 [inline] __arm64_sys_sendto+0xd8/0xf8 net/socket.c:2212 __invoke_syscall arch/arm64/kernel/syscall.c:35 [inline] invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:49 el0_svc_common+0x130/0x23c arch/arm64/kernel/syscall.c:132 do_el0_svc+0x48/0x58 arch/arm64/kernel/syscall.c:151 el0_svc+0x54/0x168 arch/arm64/kernel/entry-common.c:712 el0t_64_sync_handler+0x84/0xfc arch/arm64/kernel/entry-common.c:730 el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:598 irq event stamp: 282914 hardirqs last enabled at (282914): [] __raw_spin_unlock_irqrestore include/linux/spinlock_api_smp.h:151 [inline] hardirqs last enabled at (282914): [] _raw_spin_unlock_irqrestore+0x38/0x98 kernel/locking/spinlock.c:194 hardirqs last disabled at (282913): [] __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:108 [inline] hardirqs last disabled at (282913): [] _raw_spin_lock_irqsave+0x2c/0x7c kernel/locking/spinlock.c:162 softirqs last enabled at (282904): [] softirq_handle_end kernel/softirq.c:400 [inline] softirqs last enabled at (282904): [] handle_softirqs+0xa3c/0xbfc kernel/softirq.c:582 softirqs last disabled at (282909): [] run_ksoftirqd+0x70/0x158 kernel/softirq.c:928 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&pch->downl); lock(&pch->downl); *** DEADLOCK *** 1 lock held by ksoftirqd/1/24: #0: ffff80008f74dfa0 (rcu_read_lock){....}-{1:2}, at: rcu_lock_acquire+0x10/0x4c include/linux/rcupdate.h:325 stack backtrace: CPU: 1 UID: 0 PID: 24 Comm: ksoftirqd/1 Not tainted 6.11.0-rc7-syzkaller-g5f5673607153 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/06/2024 Call trace: dump_backtrace+0x1b8/0x1e4 arch/arm64/kernel/stacktrace.c:319 show_stack+0x2c/0x3c arch/arm64/kernel/stacktrace.c:326 __dump_stack lib/dump_stack.c:93 [inline] dump_stack_lvl+0xe4/0x150 lib/dump_stack.c:119 dump_stack+0x1c/0x28 lib/dump_stack.c:128 print_usage_bug+0x698/0x9ac kernel/locking/lockdep.c:4000 mark_lock_irq+0x980/0xd2c mark_lock+0x258/0x360 kernel/locking/lockdep.c:4677 __lock_acquire+0xf48/0x779c kernel/locking/lockdep.c:5096 lock_acquire+0x240/0x728 kernel/locking/lockdep.c:5759 __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline] _raw_spin_lock+0x48/0x60 kernel/locking/spinlock.c:154 spin_lock include/linux/spinlock.h:351 [inline] ppp_channel_bridge_input drivers/net/ppp/ppp_generic.c:2272 [inline] ppp_input+0x16c/0x854 drivers/net/ppp/ppp_generic.c:2304 ppp_async_process+0x98/0x150 drivers/net/ppp/ppp_async.c:495 tasklet_action_common+0x318/0x3f4 kernel/softirq.c:785 tasklet_action+0x68/0x8c kernel/softirq.c:811 handle_softirqs+0x2e4/0xbfc kernel/softirq.c:554 run_ksoftirqd+0x70/0x158 kernel/softirq.c:928 smpboot_thread_fn+0x4b0/0x90c kernel/smpboot.c:164 kthread+0x288/0x310 kernel/kthread.c:389 ret_from_fork+0x10/0x20 arch/arm64/kernel/entry.S:860 Fixes: 4cf476ced45d ("ppp: add PPPIOCBRIDGECHAN and PPPIOCUNBRIDGECHAN ioctls") Reported-by: syzbot+bd8d55ee2acd0a71d8ce@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/66f661e2.050a0220.38ace9.000f.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Cc: Tom Parkin Cc: James Chapman Link: https://patch.msgid.link/20240927074553.341910-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ppp/ppp_generic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 4b2971e2bf484a..0d5cd705decb1f 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -2269,7 +2269,7 @@ static bool ppp_channel_bridge_input(struct channel *pch, struct sk_buff *skb) if (!pchb) goto out_rcu; - spin_lock(&pchb->downl); + spin_lock_bh(&pchb->downl); if (!pchb->chan) { /* channel got unregistered */ kfree_skb(skb); @@ -2281,7 +2281,7 @@ static bool ppp_channel_bridge_input(struct channel *pch, struct sk_buff *skb) kfree_skb(skb); outl: - spin_unlock(&pchb->downl); + spin_unlock_bh(&pchb->downl); out_rcu: rcu_read_unlock(); From c283782fc5d60c4d8169137c6f955aa3553d3b3d Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Fri, 27 Sep 2024 19:46:10 +0800 Subject: [PATCH 20/25] net: phy: realtek: Check the index value in led_hw_control_get Just like rtl8211f_led_hw_is_supported() and rtl8211f_led_hw_control_set(), the rtl8211f_led_hw_control_get() also needs to check the index value, otherwise the caller is likely to get an incorrect rules. Fixes: 17784801d888 ("net: phy: realtek: Add support for PHY LEDs on RTL8211F") Signed-off-by: Hui Wang Reviewed-by: Marek Vasut Link: https://patch.msgid.link/20240927114610.1278935-1-hui.wang@canonical.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/realtek.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index 25e5bfbb6f89b8..c15d2f66ef0dc2 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -527,6 +527,9 @@ static int rtl8211f_led_hw_control_get(struct phy_device *phydev, u8 index, { int val; + if (index >= RTL8211F_LED_COUNT) + return -EINVAL; + val = phy_read_paged(phydev, 0xd04, RTL8211F_LEDCR); if (val < 0) return val; From 3c97fe4f9fbc2bbc555b51268a9556e61cd3ca4e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 28 Sep 2024 13:04:01 +0300 Subject: [PATCH 21/25] net: ethernet: ti: am65-cpsw: Fix forever loop in cleanup code This error handling has a typo. It should i++ instead of i--. In the original code the error handling will loop until it crashes. Fixes: da70d184a8c3 ("net: ethernet: ti: am65-cpsw: Introduce multi queue Rx") Signed-off-by: Dan Carpenter Reviewed-by: Alexander Sverdlin Reviewed-by: Roger Quadros Link: https://patch.msgid.link/8e7960cc-415d-48d7-99ce-f623022ec7b5@stanley.mountain Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index cbe99017cbfa47..d253727b160f5f 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -763,7 +763,7 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common) k3_udma_glue_disable_rx_chn(rx_chn->rx_chn); fail_rx: - for (i = 0; i < common->rx_ch_num_flows; i--) + for (i = 0; i < common->rx_ch_num_flows; i++) k3_udma_glue_reset_rx_chn(rx_chn->rx_chn, i, &rx_chn->flows[i], am65_cpsw_nuss_rx_cleanup, 0); From b04c4d9eb4f25b950b33218e33b04c94e7445e51 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sun, 29 Sep 2024 02:18:20 -0400 Subject: [PATCH 22/25] vrf: revert "vrf: Remove unnecessary RCU-bh critical section" This reverts commit 504fc6f4f7f681d2a03aa5f68aad549d90eab853. dev_queue_xmit_nit is expected to be called with BH disabled. __dev_queue_xmit has the following: /* Disable soft irqs for various locks below. Also * stops preemption for RCU. */ rcu_read_lock_bh(); VRF must follow this invariant. The referenced commit removed this protection. Which triggered a lockdep warning: ================================ WARNING: inconsistent lock state 6.11.0 #1 Tainted: G W -------------------------------- inconsistent {IN-SOFTIRQ-W} -> {SOFTIRQ-ON-W} usage. btserver/134819 [HC0[0]:SC0[0]:HE1:SE1] takes: ffff8882da30c118 (rlock-AF_PACKET){+.?.}-{2:2}, at: tpacket_rcv+0x863/0x3b30 {IN-SOFTIRQ-W} state was registered at: lock_acquire+0x19a/0x4f0 _raw_spin_lock+0x27/0x40 packet_rcv+0xa33/0x1320 __netif_receive_skb_core.constprop.0+0xcb0/0x3a90 __netif_receive_skb_list_core+0x2c9/0x890 netif_receive_skb_list_internal+0x610/0xcc0 [...] other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(rlock-AF_PACKET); lock(rlock-AF_PACKET); *** DEADLOCK *** Call Trace: dump_stack_lvl+0x73/0xa0 mark_lock+0x102e/0x16b0 __lock_acquire+0x9ae/0x6170 lock_acquire+0x19a/0x4f0 _raw_spin_lock+0x27/0x40 tpacket_rcv+0x863/0x3b30 dev_queue_xmit_nit+0x709/0xa40 vrf_finish_direct+0x26e/0x340 [vrf] vrf_l3_out+0x5f4/0xe80 [vrf] __ip_local_out+0x51e/0x7a0 [...] Fixes: 504fc6f4f7f6 ("vrf: Remove unnecessary RCU-bh critical section") Link: https://lore.kernel.org/netdev/20240925185216.1990381-1-greearb@candelatech.com/ Reported-by: Ben Greear Signed-off-by: Willem de Bruijn Cc: stable@vger.kernel.org Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Reviewed-by: David Ahern Link: https://patch.msgid.link/20240929061839.1175300-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/vrf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 4d8ccaf9a2b4d3..4087f72f0d2be8 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -608,7 +608,9 @@ static void vrf_finish_direct(struct sk_buff *skb) eth_zero_addr(eth->h_dest); eth->h_proto = skb->protocol; + rcu_read_lock_bh(); dev_queue_xmit_nit(skb, vrf_dev); + rcu_read_unlock_bh(); skb_pull(skb, ETH_HLEN); } From 555f45d24ba7cd5527716553031641cdebbe76c7 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 29 Sep 2024 15:36:40 +0300 Subject: [PATCH 23/25] bridge: mcast: Fail MDB get request on empty entry When user space deletes a port from an MDB entry, the port is removed synchronously. If this was the last port in the entry and the entry is not joined by the host itself, then the entry is scheduled for deletion via a timer. The above means that it is possible for the MDB get netlink request to retrieve an empty entry which is scheduled for deletion. This is problematic as after deleting the last port in an entry, user space cannot rely on a non-zero return code from the MDB get request as an indication that the port was successfully removed. Fix by returning an error when the entry's port list is empty and the entry is not joined by the host. Fixes: 68b380a395a7 ("bridge: mcast: Add MDB get support") Reported-by: Jamie Bainbridge Closes: https://lore.kernel.org/netdev/c92569919307749f879b9482b0f3e125b7d9d2e3.1726480066.git.jamie.bainbridge@gmail.com/ Tested-by: Jamie Bainbridge Signed-off-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20240929123640.558525-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- net/bridge/br_mdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index bc37e47ad8299e..1a52a0bca086d6 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -1674,7 +1674,7 @@ int br_mdb_get(struct net_device *dev, struct nlattr *tb[], u32 portid, u32 seq, spin_lock_bh(&br->multicast_lock); mp = br_mdb_ip_get(br, &group); - if (!mp) { + if (!mp || (!mp->ports && !mp->host_joined)) { NL_SET_ERR_MSG_MOD(extack, "MDB entry not found"); err = -ENOENT; goto unlock; From a1e40ac5b5e9077fe1f7ae0eb88034db0f9ae1ab Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 1 Oct 2024 13:17:46 -0400 Subject: [PATCH 24/25] gso: fix udp gso fraglist segmentation after pull from frag_list Detect gso fraglist skbs with corrupted geometry (see below) and pass these to skb_segment instead of skb_segment_list, as the first can segment them correctly. Valid SKB_GSO_FRAGLIST skbs - consist of two or more segments - the head_skb holds the protocol headers plus first gso_size - one or more frag_list skbs hold exactly one segment - all but the last must be gso_size Optional datapath hooks such as NAT and BPF (bpf_skb_pull_data) can modify these skbs, breaking these invariants. In extreme cases they pull all data into skb linear. For UDP, this causes a NULL ptr deref in __udpv4_gso_segment_list_csum at udp_hdr(seg->next)->dest. Detect invalid geometry due to pull, by checking head_skb size. Don't just drop, as this may blackhole a destination. Convert to be able to pass to regular skb_segment. Link: https://lore.kernel.org/netdev/20240428142913.18666-1-shiming.cheng@mediatek.com/ Fixes: 9fd1ff5d2ac7 ("udp: Support UDP fraglist GRO/GSO.") Signed-off-by: Willem de Bruijn Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20241001171752.107580-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/udp_offload.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index d842303587af93..a5be6e4ed326fb 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -296,8 +296,26 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, return NULL; } - if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST) - return __udp_gso_segment_list(gso_skb, features, is_ipv6); + if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST) { + /* Detect modified geometry and pass those to skb_segment. */ + if (skb_pagelen(gso_skb) - sizeof(*uh) == skb_shinfo(gso_skb)->gso_size) + return __udp_gso_segment_list(gso_skb, features, is_ipv6); + + /* Setup csum, as fraglist skips this in udp4_gro_receive. */ + gso_skb->csum_start = skb_transport_header(gso_skb) - gso_skb->head; + gso_skb->csum_offset = offsetof(struct udphdr, check); + gso_skb->ip_summed = CHECKSUM_PARTIAL; + + uh = udp_hdr(gso_skb); + if (is_ipv6) + uh->check = ~udp_v6_check(gso_skb->len, + &ipv6_hdr(gso_skb)->saddr, + &ipv6_hdr(gso_skb)->daddr, 0); + else + uh->check = ~udp_v4_check(gso_skb->len, + ip_hdr(gso_skb)->saddr, + ip_hdr(gso_skb)->daddr, 0); + } skb_pull(gso_skb, sizeof(*uh)); From fa7dfeae041c91e425db9fbb95fb3f57b821c386 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 26 Sep 2024 12:14:03 +0000 Subject: [PATCH 25/25] net: phy: qt2025: Fix warning: unused import DeviceId Fix the following warning when the driver is compiled as built-in: warning: unused import: `DeviceId` --> drivers/net/phy/qt2025.rs:18:5 | 18 | DeviceId, Driver, | ^^^^^^^^ | = note: `#[warn(unused_imports)]` on by default device_table in module_phy_driver macro is defined only when the driver is built as a module. Use phy::DeviceId in the macro instead of importing `DeviceId` since `phy` is always used. Fixes: fd3eaad826da ("net: phy: add Applied Micro QT2025 PHY driver") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202409190717.i135rfVo-lkp@intel.com/ Reviewed-by: Alice Ryhl Reviewed-by: Trevor Gross Signed-off-by: FUJITA Tomonori Reviewed-by: Fiona Behrens Acked-by: Miguel Ojeda Link: https://patch.msgid.link/20240926121404.242092-1-fujita.tomonori@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/qt2025.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/qt2025.rs b/drivers/net/phy/qt2025.rs index 28d8981f410bb5..1ab065798175b4 100644 --- a/drivers/net/phy/qt2025.rs +++ b/drivers/net/phy/qt2025.rs @@ -15,7 +15,7 @@ use kernel::firmware::Firmware; use kernel::net::phy::{ self, reg::{Mmd, C45}, - DeviceId, Driver, + Driver, }; use kernel::prelude::*; use kernel::sizes::{SZ_16K, SZ_8K}; @@ -23,7 +23,7 @@ use kernel::sizes::{SZ_16K, SZ_8K}; kernel::module_phy_driver! { drivers: [PhyQT2025], device_table: [ - DeviceId::new_with_driver::(), + phy::DeviceId::new_with_driver::(), ], name: "qt2025_phy", author: "FUJITA Tomonori ",