diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst index 70c4fb9d4e5ce0..a0e0fab8161ad5 100644 --- a/Documentation/networking/net_cachelines/net_device.rst +++ b/Documentation/networking/net_cachelines/net_device.rst @@ -176,3 +176,5 @@ netdevice_tracker dev_registered_tracker struct_rtnl_hw_stats64* offload_xstats_l3 struct_devlink_port* devlink_port struct_dpll_pin* dpll_pin +struct hlist_head page_pools +struct dim_irq_moder* irq_moder diff --git a/MAINTAINERS b/MAINTAINERS index 7780f04e0e9041..4a556a1ddc6ed4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1609,6 +1609,14 @@ F: Documentation/admin-guide/perf/xgene-pmu.rst F: Documentation/devicetree/bindings/perf/apm-xgene-pmu.txt F: drivers/perf/xgene_pmu.c +APPLIED MICRO QT2025 PHY DRIVER +M: FUJITA Tomonori +R: Trevor Gross +L: netdev@vger.kernel.org +L: rust-for-linux@vger.kernel.org +S: Maintained +F: drivers/net/phy/qt2025.rs + APTINA CAMERA SENSOR PLL M: Laurent Pinchart L: linux-media@vger.kernel.org @@ -8357,6 +8365,7 @@ L: netdev@vger.kernel.org L: rust-for-linux@vger.kernel.org S: Maintained F: rust/kernel/net/phy.rs +F: rust/kernel/net/phy/reg.rs EXEC & BINFMT API, ELF R: Eric Biederman diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index ed2160cc5acb40..6ea2f3071fe8f7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -1856,8 +1856,9 @@ struct cpt_flt_eng_info_req { struct cpt_flt_eng_info_rsp { struct mbox_msghdr hdr; - u64 flt_eng_map[CPT_10K_AF_INT_VEC_RVU]; - u64 rcvrd_eng_map[CPT_10K_AF_INT_VEC_RVU]; +#define CPT_AF_MAX_FLT_INT_VECS 3 + u64 flt_eng_map[CPT_AF_MAX_FLT_INT_VECS]; + u64 rcvrd_eng_map[CPT_AF_MAX_FLT_INT_VECS]; u64 rsvd; }; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index 03ee93fd9e9498..43b1d83686d164 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -400,6 +400,7 @@ struct hw_cap { bool nix_multiple_dwrr_mtu; /* Multiple DWRR_MTU to choose from */ bool npc_hash_extract; /* Hash extract enabled ? */ bool npc_exact_match_enabled; /* Exact match supported ? */ + bool cpt_rxc; /* Is CPT-RXC supported */ }; struct rvu_hwinfo { @@ -690,6 +691,35 @@ static inline bool is_cnf10ka_a0(struct rvu *rvu) return false; } +static inline bool is_cn10ka_a0(struct rvu *rvu) +{ + struct pci_dev *pdev = rvu->pdev; + + if (pdev->subsystem_device == PCI_SUBSYS_DEVID_CN10K_A && + (pdev->revision & 0x0F) == 0x0) + return true; + return false; +} + +static inline bool is_cn10ka_a1(struct rvu *rvu) +{ + struct pci_dev *pdev = rvu->pdev; + + if (pdev->subsystem_device == PCI_SUBSYS_DEVID_CN10K_A && + (pdev->revision & 0x0F) == 0x1) + return true; + return false; +} + +static inline bool is_cn10kb(struct rvu *rvu) +{ + struct pci_dev *pdev = rvu->pdev; + + if (pdev->subsystem_device == PCI_SUBSYS_DEVID_CN10K_B) + return true; + return false; +} + static inline bool is_rvu_npc_hash_extract_en(struct rvu *rvu) { u64 npc_const3; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c index daf4b951e90591..3c5bbaf12e594c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c @@ -19,6 +19,12 @@ /* Length of initial context fetch in 128 byte words */ #define CPT_CTX_ILEN 1ULL +/* Interrupt vector count of CPT RVU and RAS interrupts */ +#define CPT_10K_AF_RVU_RAS_INT_VEC_CNT 2 + +/* Default CPT_AF_RXC_CFG1:max_rxc_icb_cnt */ +#define CPT_DFLT_MAX_RXC_ICB_CNT 0xC0ULL + #define cpt_get_eng_sts(e_min, e_max, rsp, etype) \ ({ \ u64 free_sts = 0, busy_sts = 0; \ @@ -37,6 +43,41 @@ (_rsp)->free_sts_##etype = free_sts; \ }) +#define MAX_AE GENMASK_ULL(47, 32) +#define MAX_IE GENMASK_ULL(31, 16) +#define MAX_SE GENMASK_ULL(15, 0) + +static u16 cpt_max_engines_get(struct rvu *rvu) +{ + u16 max_ses, max_ies, max_aes; + u64 reg; + + reg = rvu_read64(rvu, BLKADDR_CPT0, CPT_AF_CONSTANTS1); + max_ses = FIELD_GET(MAX_SE, reg); + max_ies = FIELD_GET(MAX_IE, reg); + max_aes = FIELD_GET(MAX_AE, reg); + + return max_ses + max_ies + max_aes; +} + +/* Number of flt interrupt vectors are depends on number of engines that the + * chip has. Each flt vector represents 64 engines. + */ +static int cpt_10k_flt_nvecs_get(struct rvu *rvu, u16 max_engs) +{ + int flt_vecs; + + flt_vecs = DIV_ROUND_UP(max_engs, 64); + + if (flt_vecs > CPT_10K_AF_INT_VEC_FLT_MAX) { + dev_warn_once(rvu->dev, "flt_vecs:%d exceeds the max vectors:%d\n", + flt_vecs, CPT_10K_AF_INT_VEC_FLT_MAX); + flt_vecs = CPT_10K_AF_INT_VEC_FLT_MAX; + } + + return flt_vecs; +} + static irqreturn_t cpt_af_flt_intr_handler(int vec, void *ptr) { struct rvu_block *block = ptr; @@ -150,17 +191,26 @@ static void cpt_10k_unregister_interrupts(struct rvu_block *block, int off) { struct rvu *rvu = block->rvu; int blkaddr = block->addr; - int i; + int i, flt_vecs; + u16 max_engs; + u8 nr; + + max_engs = cpt_max_engines_get(rvu); + flt_vecs = cpt_10k_flt_nvecs_get(rvu, max_engs); /* Disable all CPT AF interrupts */ - rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(0), ~0ULL); - rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(1), ~0ULL); - rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(2), 0xFFFF); + for (i = CPT_10K_AF_INT_VEC_FLT0; i < flt_vecs; i++) { + nr = (max_engs > 64) ? 64 : max_engs; + max_engs -= nr; + rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(i), + INTR_MASK(nr)); + } rvu_write64(rvu, blkaddr, CPT_AF_RVU_INT_ENA_W1C, 0x1); rvu_write64(rvu, blkaddr, CPT_AF_RAS_INT_ENA_W1C, 0x1); - for (i = 0; i < CPT_10K_AF_INT_VEC_CNT; i++) + /* CPT AF interrupt vectors are flt_int, rvu_int and ras_int. */ + for (i = 0; i < flt_vecs + CPT_10K_AF_RVU_RAS_INT_VEC_CNT; i++) if (rvu->irq_allocated[off + i]) { free_irq(pci_irq_vector(rvu->pdev, off + i), block); rvu->irq_allocated[off + i] = false; @@ -206,12 +256,18 @@ void rvu_cpt_unregister_interrupts(struct rvu *rvu) static int cpt_10k_register_interrupts(struct rvu_block *block, int off) { + int rvu_intr_vec, ras_intr_vec; struct rvu *rvu = block->rvu; int blkaddr = block->addr; irq_handler_t flt_fn; - int i, ret; + int i, ret, flt_vecs; + u16 max_engs; + u8 nr; - for (i = CPT_10K_AF_INT_VEC_FLT0; i < CPT_10K_AF_INT_VEC_RVU; i++) { + max_engs = cpt_max_engines_get(rvu); + flt_vecs = cpt_10k_flt_nvecs_get(rvu, max_engs); + + for (i = CPT_10K_AF_INT_VEC_FLT0; i < flt_vecs; i++) { sprintf(&rvu->irq_name[(off + i) * NAME_SIZE], "CPTAF FLT%d", i); switch (i) { @@ -229,20 +285,24 @@ static int cpt_10k_register_interrupts(struct rvu_block *block, int off) flt_fn, &rvu->irq_name[(off + i) * NAME_SIZE]); if (ret) goto err; - if (i == CPT_10K_AF_INT_VEC_FLT2) - rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1S(i), 0xFFFF); - else - rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1S(i), ~0ULL); + + nr = (max_engs > 64) ? 64 : max_engs; + max_engs -= nr; + rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1S(i), + INTR_MASK(nr)); } - ret = rvu_cpt_do_register_interrupt(block, off + CPT_10K_AF_INT_VEC_RVU, + rvu_intr_vec = flt_vecs; + ras_intr_vec = rvu_intr_vec + 1; + + ret = rvu_cpt_do_register_interrupt(block, off + rvu_intr_vec, rvu_cpt_af_rvu_intr_handler, "CPTAF RVU"); if (ret) goto err; rvu_write64(rvu, blkaddr, CPT_AF_RVU_INT_ENA_W1S, 0x1); - ret = rvu_cpt_do_register_interrupt(block, off + CPT_10K_AF_INT_VEC_RAS, + ret = rvu_cpt_do_register_interrupt(block, off + ras_intr_vec, rvu_cpt_af_ras_intr_handler, "CPTAF RAS"); if (ret) @@ -680,6 +740,7 @@ static bool validate_and_update_reg_offset(struct rvu *rvu, case CPT_AF_BLK_RST: case CPT_AF_CONSTANTS1: case CPT_AF_CTX_FLUSH_TIMER: + case CPT_AF_RXC_CFG1: return true; } @@ -732,6 +793,8 @@ int rvu_mbox_handler_cpt_rd_wr_register(struct rvu *rvu, static void get_ctx_pc(struct rvu *rvu, struct cpt_sts_rsp *rsp, int blkaddr) { + struct rvu_hwinfo *hw = rvu->hw; + if (is_rvu_otx2(rvu)) return; @@ -755,14 +818,16 @@ static void get_ctx_pc(struct rvu *rvu, struct cpt_sts_rsp *rsp, int blkaddr) rsp->ctx_err = rvu_read64(rvu, blkaddr, CPT_AF_CTX_ERR); rsp->ctx_enc_id = rvu_read64(rvu, blkaddr, CPT_AF_CTX_ENC_ID); rsp->ctx_flush_timer = rvu_read64(rvu, blkaddr, CPT_AF_CTX_FLUSH_TIMER); + rsp->x2p_link_cfg0 = rvu_read64(rvu, blkaddr, CPT_AF_X2PX_LINK_CFG(0)); + rsp->x2p_link_cfg1 = rvu_read64(rvu, blkaddr, CPT_AF_X2PX_LINK_CFG(1)); + if (!hw->cap.cpt_rxc) + return; rsp->rxc_time = rvu_read64(rvu, blkaddr, CPT_AF_RXC_TIME); rsp->rxc_time_cfg = rvu_read64(rvu, blkaddr, CPT_AF_RXC_TIME_CFG); rsp->rxc_active_sts = rvu_read64(rvu, blkaddr, CPT_AF_RXC_ACTIVE_STS); rsp->rxc_zombie_sts = rvu_read64(rvu, blkaddr, CPT_AF_RXC_ZOMBIE_STS); rsp->rxc_dfrg = rvu_read64(rvu, blkaddr, CPT_AF_RXC_DFRG); - rsp->x2p_link_cfg0 = rvu_read64(rvu, blkaddr, CPT_AF_X2PX_LINK_CFG(0)); - rsp->x2p_link_cfg1 = rvu_read64(rvu, blkaddr, CPT_AF_X2PX_LINK_CFG(1)); } static void get_eng_sts(struct rvu *rvu, struct cpt_sts_rsp *rsp, int blkaddr) @@ -921,13 +986,17 @@ int rvu_mbox_handler_cpt_flt_eng_info(struct rvu *rvu, struct cpt_flt_eng_info_r struct rvu_block *block; unsigned long flags; int blkaddr, vec; + int flt_vecs; + u16 max_engs; blkaddr = validate_and_get_cpt_blkaddr(req->blkaddr); if (blkaddr < 0) return blkaddr; block = &rvu->hw->block[blkaddr]; - for (vec = 0; vec < CPT_10K_AF_INT_VEC_RVU; vec++) { + max_engs = cpt_max_engines_get(rvu); + flt_vecs = cpt_10k_flt_nvecs_get(rvu, max_engs); + for (vec = 0; vec < flt_vecs; vec++) { spin_lock_irqsave(&rvu->cpt_intr_lock, flags); rsp->flt_eng_map[vec] = block->cpt_flt_eng_map[vec]; rsp->rcvrd_eng_map[vec] = block->cpt_rcvrd_eng_map[vec]; @@ -943,10 +1012,11 @@ int rvu_mbox_handler_cpt_flt_eng_info(struct rvu *rvu, struct cpt_flt_eng_info_r static void cpt_rxc_teardown(struct rvu *rvu, int blkaddr) { struct cpt_rxc_time_cfg_req req, prev; + struct rvu_hwinfo *hw = rvu->hw; int timeout = 2000; u64 reg; - if (is_rvu_otx2(rvu)) + if (!hw->cap.cpt_rxc) return; /* Set time limit to minimum values, so that rxc entries will be @@ -1219,10 +1289,30 @@ int rvu_cpt_ctx_flush(struct rvu *rvu, u16 pcifunc) return 0; } +#define MAX_RXC_ICB_CNT GENMASK_ULL(40, 32) + int rvu_cpt_init(struct rvu *rvu) { + struct rvu_hwinfo *hw = rvu->hw; + u64 reg_val; + /* Retrieve CPT PF number */ rvu->cpt_pf_num = get_cpt_pf_num(rvu); + if (is_block_implemented(rvu->hw, BLKADDR_CPT0) && !is_rvu_otx2(rvu) && + !is_cn10kb(rvu)) + hw->cap.cpt_rxc = true; + + if (hw->cap.cpt_rxc && !is_cn10ka_a0(rvu) && !is_cn10ka_a1(rvu)) { + /* Set CPT_AF_RXC_CFG1:max_rxc_icb_cnt to 0xc0 to not effect + * inline inbound peak performance + */ + reg_val = rvu_read64(rvu, BLKADDR_CPT0, CPT_AF_RXC_CFG1); + reg_val &= ~MAX_RXC_ICB_CNT; + reg_val |= FIELD_PREP(MAX_RXC_ICB_CNT, + CPT_DFLT_MAX_RXC_ICB_CNT); + rvu_write64(rvu, BLKADDR_CPT0, CPT_AF_RXC_CFG1, reg_val); + } + spin_lock_init(&rvu->cpt_intr_lock); return 0; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h index d56be5fb7eb4a2..2b299fa8515913 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h @@ -545,6 +545,7 @@ #define CPT_AF_CTX_PSH_PC (0x49450ull) #define CPT_AF_CTX_PSH_LATENCY_PC (0x49458ull) #define CPT_AF_CTX_CAM_DATA(a) (0x49800ull | (u64)(a) << 3) +#define CPT_AF_RXC_CFG1 (0x50000ull) #define CPT_AF_RXC_TIME (0x50010ull) #define CPT_AF_RXC_TIME_CFG (0x50018ull) #define CPT_AF_RXC_DFRG (0x50020ull) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h index 5ef406c7e8a44a..fc8da209065705 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h @@ -71,13 +71,11 @@ enum cpt_af_int_vec_e { CPT_AF_INT_VEC_CNT = 0x4, }; -enum cpt_10k_af_int_vec_e { +enum cpt_cn10k_flt_int_vec_e { CPT_10K_AF_INT_VEC_FLT0 = 0x0, CPT_10K_AF_INT_VEC_FLT1 = 0x1, CPT_10K_AF_INT_VEC_FLT2 = 0x2, - CPT_10K_AF_INT_VEC_RVU = 0x3, - CPT_10K_AF_INT_VEC_RAS = 0x4, - CPT_10K_AF_INT_VEC_CNT = 0x5, + CPT_10K_AF_INT_VEC_FLT_MAX = 0x3, }; /* NPA Admin function Interrupt Vector Enumeration */ diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c index 7136bc48530ba0..df0234a338a8b1 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c @@ -278,7 +278,7 @@ struct nfp_nsp *nfp_nsp_open(struct nfp_cpp *cpp) res = nfp_resource_acquire(cpp, NFP_RESOURCE_NSP); if (IS_ERR(res)) - return (void *)res; + return ERR_CAST(res); state = kzalloc(sizeof(*state), GFP_KERNEL); if (!state) { diff --git a/drivers/net/ethernet/sfc/tc_counters.c b/drivers/net/ethernet/sfc/tc_counters.c index c4408842432363..a421b012350694 100644 --- a/drivers/net/ethernet/sfc/tc_counters.c +++ b/drivers/net/ethernet/sfc/tc_counters.c @@ -249,7 +249,7 @@ struct efx_tc_counter_index *efx_tc_flower_get_counter_index( &ctr->linkage, efx_tc_counter_id_ht_params); kfree(ctr); - return (void *)cnt; /* it's an ERR_PTR */ + return ERR_CAST(cnt); } ctr->cnt = cnt; refcount_set(&ctr->ref, 1); diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index fef4eff7753a7a..b1afcb8740de12 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -2,6 +2,8 @@ /* Copyright (c) 2014 Mahesh Bandewar */ +#include + #include "ipvlan.h" static u32 ipvlan_jhash_secret __read_mostly; @@ -420,7 +422,7 @@ static noinline_for_stack int ipvlan_process_v4_outbound(struct sk_buff *skb) int err, ret = NET_XMIT_DROP; struct flowi4 fl4 = { .flowi4_oif = dev->ifindex, - .flowi4_tos = RT_TOS(ip4h->tos), + .flowi4_tos = ip4h->tos & INET_DSCP_MASK, .flowi4_flags = FLOWI_FLAG_ANYSRC, .flowi4_mark = skb->mark, .daddr = ip4h->daddr, diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index f530fcd092fe4f..01b235b3bb7e80 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -112,6 +112,13 @@ config ADIN1100_PHY Currently supports the: - ADIN1100 - Robust,Industrial, Low Power 10BASE-T1L Ethernet PHY +config AMCC_QT2025_PHY + tristate "AMCC QT2025 PHY" + depends on RUST_PHYLIB_ABSTRACTIONS + depends on RUST_FW_LOADER_ABSTRACTIONS + help + Adds support for the Applied Micro Circuits Corporation QT2025 PHY. + source "drivers/net/phy/aquantia/Kconfig" config AX88796B_PHY diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index 33adb3df5f64f1..90f886844381d0 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -37,6 +37,7 @@ obj-$(CONFIG_ADIN_PHY) += adin.o obj-$(CONFIG_ADIN1100_PHY) += adin1100.o obj-$(CONFIG_AIR_EN8811H_PHY) += air_en8811h.o obj-$(CONFIG_AMD_PHY) += amd.o +obj-$(CONFIG_AMCC_QT2025_PHY) += qt2025.o obj-$(CONFIG_AQUANTIA_PHY) += aquantia/ ifdef CONFIG_AX88796B_RUST_PHY obj-$(CONFIG_AX88796B_PHY) += ax88796b_rust.o diff --git a/drivers/net/phy/ax88796b_rust.rs b/drivers/net/phy/ax88796b_rust.rs index 5c92572962dce7..8c7eb009d9fc0f 100644 --- a/drivers/net/phy/ax88796b_rust.rs +++ b/drivers/net/phy/ax88796b_rust.rs @@ -6,7 +6,7 @@ //! C version of this driver: [`drivers/net/phy/ax88796b.c`](./ax88796b.c) use kernel::{ c_str, - net::phy::{self, DeviceId, Driver}, + net::phy::{self, reg::C22, DeviceId, Driver}, prelude::*, uapi, }; @@ -24,7 +24,6 @@ kernel::module_phy_driver! { license: "GPL", } -const MII_BMCR: u16 = uapi::MII_BMCR as u16; const BMCR_SPEED100: u16 = uapi::BMCR_SPEED100 as u16; const BMCR_FULLDPLX: u16 = uapi::BMCR_FULLDPLX as u16; @@ -33,7 +32,7 @@ const BMCR_FULLDPLX: u16 = uapi::BMCR_FULLDPLX as u16; // Toggle BMCR_RESET bit off to accommodate broken AX8796B PHY implementation // such as used on the Individual Computers' X-Surf 100 Zorro card. fn asix_soft_reset(dev: &mut phy::Device) -> Result { - dev.write(uapi::MII_BMCR as u16, 0)?; + dev.write(C22::BMCR, 0)?; dev.genphy_soft_reset() } @@ -55,7 +54,7 @@ impl Driver for PhyAX88772A { } // If MII_LPA is 0, phy_resolve_aneg_linkmode() will fail to resolve // linkmode so use MII_BMCR as default values. - let ret = dev.read(MII_BMCR)?; + let ret = dev.read(C22::BMCR)?; if ret & BMCR_SPEED100 != 0 { dev.set_speed(uapi::SPEED_100); diff --git a/drivers/net/phy/qt2025.rs b/drivers/net/phy/qt2025.rs new file mode 100644 index 00000000000000..28d8981f410bb5 --- /dev/null +++ b/drivers/net/phy/qt2025.rs @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) Tehuti Networks Ltd. +// Copyright (C) 2024 FUJITA Tomonori + +//! Applied Micro Circuits Corporation QT2025 PHY driver +//! +//! This driver is based on the vendor driver `QT2025_phy.c`. This source +//! and firmware can be downloaded on the EN-9320SFP+ support site. +//! +//! The QT2025 PHY integrates an Intel 8051 micro-controller. + +use kernel::c_str; +use kernel::error::code; +use kernel::firmware::Firmware; +use kernel::net::phy::{ + self, + reg::{Mmd, C45}, + DeviceId, Driver, +}; +use kernel::prelude::*; +use kernel::sizes::{SZ_16K, SZ_8K}; + +kernel::module_phy_driver! { + drivers: [PhyQT2025], + device_table: [ + DeviceId::new_with_driver::(), + ], + name: "qt2025_phy", + author: "FUJITA Tomonori ", + description: "AMCC QT2025 PHY driver", + license: "GPL", + firmware: ["qt2025-2.0.3.3.fw"], +} + +struct PhyQT2025; + +#[vtable] +impl Driver for PhyQT2025 { + const NAME: &'static CStr = c_str!("QT2025 10Gpbs SFP+"); + const PHY_DEVICE_ID: phy::DeviceId = phy::DeviceId::new_with_exact_mask(0x0043a400); + + fn probe(dev: &mut phy::Device) -> Result<()> { + // Check the hardware revision code. + // Only 0x3b works with this driver and firmware. + let hw_rev = dev.read(C45::new(Mmd::PMAPMD, 0xd001))?; + if (hw_rev >> 8) != 0xb3 { + return Err(code::ENODEV); + } + + // `MICRO_RESETN`: hold the micro-controller in reset while configuring. + dev.write(C45::new(Mmd::PMAPMD, 0xc300), 0x0000)?; + // `SREFCLK_FREQ`: configure clock frequency of the micro-controller. + dev.write(C45::new(Mmd::PMAPMD, 0xc302), 0x0004)?; + // Non loopback mode. + dev.write(C45::new(Mmd::PMAPMD, 0xc319), 0x0038)?; + // `CUS_LAN_WAN_CONFIG`: select between LAN and WAN (WIS) mode. + dev.write(C45::new(Mmd::PMAPMD, 0xc31a), 0x0098)?; + // The following writes use standardized registers (3.38 through + // 3.41 5/10/25GBASE-R PCS test pattern seed B) for something else. + // We don't know what. + dev.write(C45::new(Mmd::PCS, 0x0026), 0x0e00)?; + dev.write(C45::new(Mmd::PCS, 0x0027), 0x0893)?; + dev.write(C45::new(Mmd::PCS, 0x0028), 0xa528)?; + dev.write(C45::new(Mmd::PCS, 0x0029), 0x0003)?; + // Configure transmit and recovered clock. + dev.write(C45::new(Mmd::PMAPMD, 0xa30a), 0x06e1)?; + // `MICRO_RESETN`: release the micro-controller from the reset state. + dev.write(C45::new(Mmd::PMAPMD, 0xc300), 0x0002)?; + // The micro-controller will start running from the boot ROM. + dev.write(C45::new(Mmd::PCS, 0xe854), 0x00c0)?; + + let fw = Firmware::request(c_str!("qt2025-2.0.3.3.fw"), dev.as_ref())?; + if fw.data().len() > SZ_16K + SZ_8K { + return Err(code::EFBIG); + } + + // The 24kB of program memory space is accessible by MDIO. + // The first 16kB of memory is located in the address range 3.8000h - 3.BFFFh. + // The next 8kB of memory is located at 4.8000h - 4.9FFFh. + let mut dst_offset = 0; + let mut dst_mmd = Mmd::PCS; + for (src_idx, val) in fw.data().iter().enumerate() { + if src_idx == SZ_16K { + // Start writing to the next register with no offset + dst_offset = 0; + dst_mmd = Mmd::PHYXS; + } + + dev.write(C45::new(dst_mmd, 0x8000 + dst_offset), (*val).into())?; + + dst_offset += 1; + } + // The micro-controller will start running from SRAM. + dev.write(C45::new(Mmd::PCS, 0xe854), 0x0040)?; + + // TODO: sleep here until the hw becomes ready. + Ok(()) + } + + fn read_status(dev: &mut phy::Device) -> Result { + dev.genphy_read_status::() + } +} diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 040f0bb36c0ea4..a900908eb24a6f 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -37,6 +37,7 @@ #include #include #include +#include #define DRV_NAME "vrf" #define DRV_VERSION "1.1" @@ -520,7 +521,7 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, /* needed to match OIF rule */ fl4.flowi4_l3mdev = vrf_dev->ifindex; fl4.flowi4_iif = LOOPBACK_IFINDEX; - fl4.flowi4_tos = RT_TOS(ip4h->tos); + fl4.flowi4_tos = ip4h->tos & INET_DSCP_MASK; fl4.flowi4_flags = FLOWI_FLAG_ANYSRC; fl4.flowi4_proto = ip4h->protocol; fl4.daddr = ip4h->daddr; diff --git a/include/net/ip.h b/include/net/ip.h index c5606cadb1a552..d92d3bc3ec0e25 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -33,6 +33,7 @@ #include #include #include +#include #define IPV4_MAX_PMTU 65535U /* RFC 2675, Section 5.1 */ #define IPV4_MIN_MTU 68 /* RFC 791 */ @@ -258,7 +259,9 @@ static inline u8 ip_sendmsg_scope(const struct inet_sock *inet, static inline __u8 get_rttos(struct ipcm_cookie* ipc, struct inet_sock *inet) { - return (ipc->tos != -1) ? RT_TOS(ipc->tos) : RT_TOS(READ_ONCE(inet->tos)); + u8 dsfield = ipc->tos != -1 ? ipc->tos : READ_ONCE(inet->tos); + + return dsfield & INET_DSCP_MASK; } /* datagram.c */ @@ -794,9 +797,8 @@ static inline void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) ip_cmsg_recv_offset(msg, skb->sk, skb, 0, 0); } -bool icmp_global_allow(void); -extern int sysctl_icmp_msgs_per_sec; -extern int sysctl_icmp_msgs_burst; +bool icmp_global_allow(struct net *net); +void icmp_global_consume(struct net *net); #ifdef CONFIG_PROC_FS int ip_misc_proc_init(void); diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 5fcd61ada62289..276f622f351687 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -122,7 +122,10 @@ struct netns_ipv4 { u8 sysctl_icmp_errors_use_inbound_ifaddr; int sysctl_icmp_ratelimit; int sysctl_icmp_ratemask; - + int sysctl_icmp_msgs_per_sec; + int sysctl_icmp_msgs_burst; + atomic_t icmp_global_credit; + u32 icmp_global_stamp; u32 ip_rt_min_pmtu; int ip_rt_mtu_expires; int ip_rt_min_advmss; diff --git a/include/net/route.h b/include/net/route.h index 93833cfe9c9681..1789f1e6640b46 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -45,7 +46,7 @@ static inline __u8 ip_sock_rt_scope(const struct sock *sk) static inline __u8 ip_sock_rt_tos(const struct sock *sk) { - return RT_TOS(READ_ONCE(inet_sk(sk)->tos)); + return READ_ONCE(inet_sk(sk)->tos) & INET_DSCP_MASK; } struct ip_tunnel_info; @@ -265,8 +266,6 @@ static inline void ip_rt_put(struct rtable *rt) dst_release(&rt->dst); } -#define IPTOS_RT_MASK (IPTOS_TOS_MASK & ~3) - extern const __u8 ip_tos2prio[16]; static inline char rt_tos2priority(u8 tos) diff --git a/net/core/filter.c b/net/core/filter.c index f09d875cc05318..8569cd2482eebf 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2372,7 +2372,7 @@ static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev, struct flowi4 fl4 = { .flowi4_flags = FLOWI_FLAG_ANYSRC, .flowi4_mark = skb->mark, - .flowi4_tos = RT_TOS(ip4h->tos), + .flowi4_tos = ip4h->tos & INET_DSCP_MASK, .flowi4_oif = dev->ifindex, .flowi4_proto = ip4h->protocol, .daddr = ip4h->daddr, diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c index 507cb21d6bf0c5..a0705edca22a1a 100644 --- a/net/ethtool/pse-pd.c +++ b/net/ethtool/pse-pd.c @@ -222,13 +222,10 @@ const struct nla_policy ethnl_pse_set_policy[ETHTOOL_A_PSE_MAX + 1] = { }; static int -ethnl_set_pse_validate(struct ethnl_req_info *req_info, struct genl_info *info) +ethnl_set_pse_validate(struct phy_device *phydev, struct genl_info *info) { struct nlattr **tb = info->attrs; - struct phy_device *phydev; - phydev = ethnl_req_get_phydev(req_info, tb[ETHTOOL_A_PSE_HEADER], - info->extack); if (IS_ERR_OR_NULL(phydev)) { NL_SET_ERR_MSG(info->extack, "No PHY is attached"); return -EOPNOTSUPP; @@ -254,7 +251,7 @@ ethnl_set_pse_validate(struct ethnl_req_info *req_info, struct genl_info *info) return -EOPNOTSUPP; } - return 1; + return 0; } static int @@ -262,12 +259,13 @@ ethnl_set_pse(struct ethnl_req_info *req_info, struct genl_info *info) { struct nlattr **tb = info->attrs; struct phy_device *phydev; - int ret = 0; + int ret; phydev = ethnl_req_get_phydev(req_info, tb[ETHTOOL_A_PSE_HEADER], info->extack); - if (IS_ERR_OR_NULL(phydev)) - return -ENODEV; + ret = ethnl_set_pse_validate(phydev, info); + if (ret) + return ret; if (tb[ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT]) { unsigned int pw_limit; @@ -314,7 +312,6 @@ const struct ethnl_request_ops ethnl_pse_request_ops = { .fill_reply = pse_fill_reply, .cleanup_data = pse_cleanup_data, - .set_validate = ethnl_set_pse_validate, .set = ethnl_set_pse, /* PSE has no notification */ }; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index b8f56d03fcbb62..d2463b6e390eb9 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -93,6 +93,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -220,61 +221,56 @@ static inline void icmp_xmit_unlock(struct sock *sk) spin_unlock(&sk->sk_lock.slock); } -int sysctl_icmp_msgs_per_sec __read_mostly = 1000; -int sysctl_icmp_msgs_burst __read_mostly = 50; - -static struct { - spinlock_t lock; - u32 credit; - u32 stamp; -} icmp_global = { - .lock = __SPIN_LOCK_UNLOCKED(icmp_global.lock), -}; - /** * icmp_global_allow - Are we allowed to send one more ICMP message ? + * @net: network namespace * * Uses a token bucket to limit our ICMP messages to ~sysctl_icmp_msgs_per_sec. * Returns false if we reached the limit and can not send another packet. - * Note: called with BH disabled + * Works in tandem with icmp_global_consume(). */ -bool icmp_global_allow(void) +bool icmp_global_allow(struct net *net) { - u32 credit, delta, incr = 0, now = (u32)jiffies; - bool rc = false; + u32 delta, now, oldstamp; + int incr, new, old; - /* Check if token bucket is empty and cannot be refilled - * without taking the spinlock. The READ_ONCE() are paired - * with the following WRITE_ONCE() in this same function. + /* Note: many cpus could find this condition true. + * Then later icmp_global_consume() could consume more credits, + * this is an acceptable race. */ - if (!READ_ONCE(icmp_global.credit)) { - delta = min_t(u32, now - READ_ONCE(icmp_global.stamp), HZ); - if (delta < HZ / 50) - return false; - } + if (atomic_read(&net->ipv4.icmp_global_credit) > 0) + return true; - spin_lock(&icmp_global.lock); - delta = min_t(u32, now - icmp_global.stamp, HZ); - if (delta >= HZ / 50) { - incr = READ_ONCE(sysctl_icmp_msgs_per_sec) * delta / HZ; - if (incr) - WRITE_ONCE(icmp_global.stamp, now); - } - credit = min_t(u32, icmp_global.credit + incr, - READ_ONCE(sysctl_icmp_msgs_burst)); - if (credit) { - /* We want to use a credit of one in average, but need to randomize - * it for security reasons. - */ - credit = max_t(int, credit - get_random_u32_below(3), 0); - rc = true; + now = jiffies; + oldstamp = READ_ONCE(net->ipv4.icmp_global_stamp); + delta = min_t(u32, now - oldstamp, HZ); + if (delta < HZ / 50) + return false; + + incr = READ_ONCE(net->ipv4.sysctl_icmp_msgs_per_sec) * delta / HZ; + if (!incr) + return false; + + if (cmpxchg(&net->ipv4.icmp_global_stamp, oldstamp, now) == oldstamp) { + old = atomic_read(&net->ipv4.icmp_global_credit); + do { + new = min(old + incr, READ_ONCE(net->ipv4.sysctl_icmp_msgs_burst)); + } while (!atomic_try_cmpxchg(&net->ipv4.icmp_global_credit, &old, new)); } - WRITE_ONCE(icmp_global.credit, credit); - spin_unlock(&icmp_global.lock); - return rc; + return true; } EXPORT_SYMBOL(icmp_global_allow); +void icmp_global_consume(struct net *net) +{ + int credits = get_random_u32_below(3); + + /* Note: this might make icmp_global.credit negative. */ + if (credits) + atomic_sub(credits, &net->ipv4.icmp_global_credit); +} +EXPORT_SYMBOL(icmp_global_consume); + static bool icmpv4_mask_allow(struct net *net, int type, int code) { if (type > NR_ICMP_TYPES) @@ -291,14 +287,16 @@ static bool icmpv4_mask_allow(struct net *net, int type, int code) return false; } -static bool icmpv4_global_allow(struct net *net, int type, int code) +static bool icmpv4_global_allow(struct net *net, int type, int code, + bool *apply_ratelimit) { if (icmpv4_mask_allow(net, type, code)) return true; - if (icmp_global_allow()) + if (icmp_global_allow(net)) { + *apply_ratelimit = true; return true; - + } __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL); return false; } @@ -308,15 +306,16 @@ static bool icmpv4_global_allow(struct net *net, int type, int code) */ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, - struct flowi4 *fl4, int type, int code) + struct flowi4 *fl4, int type, int code, + bool apply_ratelimit) { struct dst_entry *dst = &rt->dst; struct inet_peer *peer; bool rc = true; int vif; - if (icmpv4_mask_allow(net, type, code)) - goto out; + if (!apply_ratelimit) + return true; /* No rate limit on loopback */ if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) @@ -331,6 +330,8 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, out: if (!rc) __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITHOST); + else + icmp_global_consume(net); return rc; } @@ -402,6 +403,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) struct ipcm_cookie ipc; struct rtable *rt = skb_rtable(skb); struct net *net = dev_net(rt->dst.dev); + bool apply_ratelimit = false; struct flowi4 fl4; struct sock *sk; struct inet_sock *inet; @@ -413,11 +415,11 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) if (ip_options_echo(net, &icmp_param->replyopts.opt.opt, skb)) return; - /* Needed by both icmp_global_allow and icmp_xmit_lock */ + /* Needed by both icmpv4_global_allow and icmp_xmit_lock */ local_bh_disable(); - /* global icmp_msgs_per_sec */ - if (!icmpv4_global_allow(net, type, code)) + /* is global icmp_msgs_per_sec exhausted ? */ + if (!icmpv4_global_allow(net, type, code, &apply_ratelimit)) goto out_bh_enable; sk = icmp_xmit_lock(net); @@ -450,7 +452,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) goto out_unlock; - if (icmpv4_xrlim_allow(net, rt, &fl4, type, code)) + if (icmpv4_xrlim_allow(net, rt, &fl4, type, code, apply_ratelimit)) icmp_push_reply(sk, icmp_param, &fl4, &ipc, &rt); ip_rt_put(rt); out_unlock: @@ -496,7 +498,7 @@ static struct rtable *icmp_route_lookup(struct net *net, fl4->saddr = saddr; fl4->flowi4_mark = mark; fl4->flowi4_uid = sock_net_uid(net, NULL); - fl4->flowi4_tos = RT_TOS(tos); + fl4->flowi4_tos = tos & INET_DSCP_MASK; fl4->flowi4_proto = IPPROTO_ICMP; fl4->fl4_icmp_type = type; fl4->fl4_icmp_code = code; @@ -596,6 +598,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, int room; struct icmp_bxm icmp_param; struct rtable *rt = skb_rtable(skb_in); + bool apply_ratelimit = false; struct ipcm_cookie ipc; struct flowi4 fl4; __be32 saddr; @@ -677,7 +680,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, } } - /* Needed by both icmp_global_allow and icmp_xmit_lock */ + /* Needed by both icmpv4_global_allow and icmp_xmit_lock */ local_bh_disable(); /* Check global sysctl_icmp_msgs_per_sec ratelimit, unless @@ -685,7 +688,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, * loopback, then peer ratelimit still work (in icmpv4_xrlim_allow) */ if (!(skb_in->dev && (skb_in->dev->flags&IFF_LOOPBACK)) && - !icmpv4_global_allow(net, type, code)) + !icmpv4_global_allow(net, type, code, &apply_ratelimit)) goto out_bh_enable; sk = icmp_xmit_lock(net); @@ -744,7 +747,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, goto out_unlock; /* peer icmp_ratelimit */ - if (!icmpv4_xrlim_allow(net, rt, &fl4, type, code)) + if (!icmpv4_xrlim_allow(net, rt, &fl4, type, code, apply_ratelimit)) goto ende; /* RFC says return as much as we can without exceeding 576 bytes. */ @@ -1487,6 +1490,8 @@ static int __net_init icmp_sk_init(struct net *net) net->ipv4.sysctl_icmp_ratelimit = 1 * HZ; net->ipv4.sysctl_icmp_ratemask = 0x1818; net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr = 0; + net->ipv4.sysctl_icmp_msgs_per_sec = 1000; + net->ipv4.sysctl_icmp_msgs_burst = 50; return 0; } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index b90d0f78ac8080..eea443b7f65e20 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -77,6 +77,7 @@ #include #include #include +#include #include #include #include @@ -1621,7 +1622,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, flowi4_init_output(&fl4, oif, IP4_REPLY_MARK(net, skb->mark) ?: sk->sk_mark, - RT_TOS(arg->tos), + arg->tos & INET_DSCP_MASK, RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol, ip_reply_arg_flowi_flags(arg), daddr, saddr, diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f6972b24664a00..723ac9181558c3 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -512,7 +512,7 @@ static void __build_flow_key(const struct net *net, struct flowi4 *fl4, sk->sk_protocol; } - flowi4_init_output(fl4, oif, mark, tos & IPTOS_RT_MASK, scope, + flowi4_init_output(fl4, oif, mark, tos & INET_DSCP_MASK, scope, prot, flow_flags, iph->daddr, iph->saddr, 0, 0, sock_net_uid(net, sk)); } @@ -541,7 +541,7 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk) if (inet_opt && inet_opt->opt.srr) daddr = inet_opt->opt.faddr; flowi4_init_output(fl4, sk->sk_bound_dev_if, READ_ONCE(sk->sk_mark), - ip_sock_rt_tos(sk) & IPTOS_RT_MASK, + ip_sock_rt_tos(sk), ip_sock_rt_scope(sk), inet_test_bit(HDRINCL, sk) ? IPPROTO_RAW : sk->sk_protocol, @@ -2618,7 +2618,7 @@ struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, struct rtable *rth; fl4->flowi4_iif = LOOPBACK_IFINDEX; - fl4->flowi4_tos &= IPTOS_RT_MASK; + fl4->flowi4_tos &= INET_DSCP_MASK; rcu_read_lock(); rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb); @@ -3261,7 +3261,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, fl4.daddr = dst; fl4.saddr = src; - fl4.flowi4_tos = rtm->rtm_tos & IPTOS_RT_MASK; + fl4.flowi4_tos = rtm->rtm_tos & INET_DSCP_MASK; fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0; fl4.flowi4_mark = mark; fl4.flowi4_uid = uid; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 4af0c234d8d763..a79b2a52ce01e6 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -600,22 +600,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0444, .proc_handler = proc_tcp_available_ulp, }, - { - .procname = "icmp_msgs_per_sec", - .data = &sysctl_icmp_msgs_per_sec, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - }, - { - .procname = "icmp_msgs_burst", - .data = &sysctl_icmp_msgs_burst, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - }, { .procname = "udp_mem", .data = &sysctl_udp_mem, @@ -701,6 +685,22 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "icmp_msgs_per_sec", + .data = &init_net.ipv4.sysctl_icmp_msgs_per_sec, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + }, + { + .procname = "icmp_msgs_burst", + .data = &init_net.ipv4.sysctl_icmp_msgs_burst, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + }, { .procname = "ping_group_range", .data = &init_net.ipv4.ping_group_range.range, diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 7b31674644efc3..071b0bc1179d81 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -175,14 +175,16 @@ static bool icmpv6_mask_allow(struct net *net, int type) return false; } -static bool icmpv6_global_allow(struct net *net, int type) +static bool icmpv6_global_allow(struct net *net, int type, + bool *apply_ratelimit) { if (icmpv6_mask_allow(net, type)) return true; - if (icmp_global_allow()) + if (icmp_global_allow(net)) { + *apply_ratelimit = true; return true; - + } __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL); return false; } @@ -191,13 +193,13 @@ static bool icmpv6_global_allow(struct net *net, int type) * Check the ICMP output rate limit */ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, - struct flowi6 *fl6) + struct flowi6 *fl6, bool apply_ratelimit) { struct net *net = sock_net(sk); struct dst_entry *dst; bool res = false; - if (icmpv6_mask_allow(net, type)) + if (!apply_ratelimit) return true; /* @@ -228,6 +230,8 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, if (!res) __ICMP6_INC_STATS(net, ip6_dst_idev(dst), ICMP6_MIB_RATELIMITHOST); + else + icmp_global_consume(net); dst_release(dst); return res; } @@ -452,6 +456,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, struct net *net; struct ipv6_pinfo *np; const struct in6_addr *saddr = NULL; + bool apply_ratelimit = false; struct dst_entry *dst; struct icmp6hdr tmp_hdr; struct flowi6 fl6; @@ -533,11 +538,12 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, return; } - /* Needed by both icmp_global_allow and icmpv6_xmit_lock */ + /* Needed by both icmpv6_global_allow and icmpv6_xmit_lock */ local_bh_disable(); /* Check global sysctl_icmp_msgs_per_sec ratelimit */ - if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type)) + if (!(skb->dev->flags & IFF_LOOPBACK) && + !icmpv6_global_allow(net, type, &apply_ratelimit)) goto out_bh_enable; mip6_addr_swap(skb, parm); @@ -575,7 +581,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, np = inet6_sk(sk); - if (!icmpv6_xrlim_allow(sk, type, &fl6)) + if (!icmpv6_xrlim_allow(sk, type, &fl6, apply_ratelimit)) goto out; tmp_hdr.icmp6_type = type; @@ -717,6 +723,7 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb) struct ipv6_pinfo *np; const struct in6_addr *saddr = NULL; struct icmp6hdr *icmph = icmp6_hdr(skb); + bool apply_ratelimit = false; struct icmp6hdr tmp_hdr; struct flowi6 fl6; struct icmpv6_msg msg; @@ -781,8 +788,9 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb) goto out; /* Check the ratelimit */ - if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) || - !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6)) + if ((!(skb->dev->flags & IFF_LOOPBACK) && + !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY, &apply_ratelimit)) || + !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6, apply_ratelimit)) goto out_dst_release; idev = __in6_dev_get(skb->dev); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 83b195f09561d9..3b2eed7fc7659e 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -51,6 +51,7 @@ #include #include #include +#include /* This version of net/ipv6/sit.c is cloned of net/ipv4/ip_gre.c @@ -935,8 +936,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, } flowi4_init_output(&fl4, tunnel->parms.link, tunnel->fwmark, - RT_TOS(tos), RT_SCOPE_UNIVERSE, IPPROTO_IPV6, - 0, dst, tiph->saddr, 0, 0, + tos & INET_DSCP_MASK, RT_SCOPE_UNIVERSE, + IPPROTO_IPV6, 0, dst, tiph->saddr, 0, 0, sock_net_uid(tunnel->net, NULL)); rt = dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr); diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index c92bdc4dfe1960..729ef582a3a8b8 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2491,7 +2491,7 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, acts = nla_alloc_flow_actions(new_acts_size); if (IS_ERR(acts)) - return (void *)acts; + return ERR_CAST(acts); memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len); acts->actions_len = (*sfa)->actions_len; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index c56c61b0c12ef2..b22767c0c078c6 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -45,6 +45,7 @@ #ifdef CONFIG_XFRM_ESPINTCP #include #endif +#include #include "xfrm_hash.h" @@ -2561,7 +2562,7 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl, static int xfrm_get_tos(const struct flowi *fl, int family) { if (family == AF_INET) - return IPTOS_RT_MASK & fl->u.ip4.flowi4_tos; + return fl->u.ip4.flowi4_tos & INET_DSCP_MASK; return 0; } diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index 274bdc1b0a824a..58ed400198bf2e 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -43,6 +43,7 @@ pub mod net; pub mod page; pub mod prelude; pub mod print; +pub mod sizes; mod static_assert; #[doc(hidden)] pub mod std_vendor; diff --git a/rust/kernel/net/phy.rs b/rust/kernel/net/phy.rs index fd40b703d2244f..1d47884aa3cf18 100644 --- a/rust/kernel/net/phy.rs +++ b/rust/kernel/net/phy.rs @@ -7,8 +7,9 @@ //! C headers: [`include/linux/phy.h`](srctree/include/linux/phy.h). use crate::{error::*, prelude::*, types::Opaque}; +use core::{marker::PhantomData, ptr::addr_of_mut}; -use core::marker::PhantomData; +pub mod reg; /// PHY state machine states. /// @@ -58,8 +59,9 @@ pub enum DuplexMode { /// /// # Invariants /// -/// Referencing a `phy_device` using this struct asserts that you are in -/// a context where all methods defined on this struct are safe to call. +/// - Referencing a `phy_device` using this struct asserts that you are in +/// a context where all methods defined on this struct are safe to call. +/// - This struct always has a valid `self.0.mdio.dev`. /// /// [`struct phy_device`]: srctree/include/linux/phy.h // During the calls to most functions in [`Driver`], the C side (`PHYLIB`) holds a lock that is @@ -76,9 +78,11 @@ impl Device { /// /// # Safety /// - /// For the duration of 'a, the pointer must point at a valid `phy_device`, - /// and the caller must be in a context where all methods defined on this struct - /// are safe to call. + /// For the duration of `'a`, + /// - the pointer must point at a valid `phy_device`, and the caller + /// must be in a context where all methods defined on this struct + /// are safe to call. + /// - `(*ptr).mdio.dev` must be a valid. unsafe fn from_raw<'a>(ptr: *mut bindings::phy_device) -> &'a mut Self { // CAST: `Self` is a `repr(transparent)` wrapper around `bindings::phy_device`. let ptr = ptr.cast::(); @@ -175,32 +179,15 @@ impl Device { unsafe { (*phydev).duplex = v }; } - /// Reads a given C22 PHY register. + /// Reads a PHY register. // This function reads a hardware register and updates the stats so takes `&mut self`. - pub fn read(&mut self, regnum: u16) -> Result { - let phydev = self.0.get(); - // SAFETY: `phydev` is pointing to a valid object by the type invariant of `Self`. - // So it's just an FFI call, open code of `phy_read()` with a valid `phy_device` pointer - // `phydev`. - let ret = unsafe { - bindings::mdiobus_read((*phydev).mdio.bus, (*phydev).mdio.addr, regnum.into()) - }; - if ret < 0 { - Err(Error::from_errno(ret)) - } else { - Ok(ret as u16) - } + pub fn read(&mut self, reg: R) -> Result { + reg.read(self) } - /// Writes a given C22 PHY register. - pub fn write(&mut self, regnum: u16, val: u16) -> Result { - let phydev = self.0.get(); - // SAFETY: `phydev` is pointing to a valid object by the type invariant of `Self`. - // So it's just an FFI call, open code of `phy_write()` with a valid `phy_device` pointer - // `phydev`. - to_result(unsafe { - bindings::mdiobus_write((*phydev).mdio.bus, (*phydev).mdio.addr, regnum.into(), val) - }) + /// Writes a PHY register. + pub fn write(&mut self, reg: R, val: u16) -> Result { + reg.write(self, val) } /// Reads a paged register. @@ -265,16 +252,8 @@ impl Device { } /// Checks the link status and updates current link state. - pub fn genphy_read_status(&mut self) -> Result { - let phydev = self.0.get(); - // SAFETY: `phydev` is pointing to a valid object by the type invariant of `Self`. - // So it's just an FFI call. - let ret = unsafe { bindings::genphy_read_status(phydev) }; - if ret < 0 { - Err(Error::from_errno(ret)) - } else { - Ok(ret as u16) - } + pub fn genphy_read_status(&mut self) -> Result { + R::read_status(self) } /// Updates the link status. @@ -302,6 +281,14 @@ impl Device { } } +impl AsRef for Device { + fn as_ref(&self) -> &kernel::device::Device { + let phydev = self.0.get(); + // SAFETY: The struct invariant ensures that `mdio.dev` is valid. + unsafe { kernel::device::Device::as_ref(addr_of_mut!((*phydev).mdio.dev)) } + } +} + /// Defines certain other features this PHY supports (like interrupts). /// /// These flag values are used in [`Driver::FLAGS`]. @@ -338,6 +325,21 @@ impl Adapter { }) } + /// # Safety + /// + /// `phydev` must be passed by the corresponding callback in `phy_driver`. + unsafe extern "C" fn probe_callback(phydev: *mut bindings::phy_device) -> core::ffi::c_int { + from_result(|| { + // SAFETY: This callback is called only in contexts + // where we can exclusively access `phy_device` because + // it's not published yet, so the accessors on `Device` are okay + // to call. + let dev = unsafe { Device::from_raw(phydev) }; + T::probe(dev)?; + Ok(0) + }) + } + /// # Safety /// /// `phydev` must be passed by the corresponding callback in `phy_driver`. @@ -511,6 +513,11 @@ pub const fn create_phy_driver() -> DriverVTable { } else { None }, + probe: if T::HAS_PROBE { + Some(Adapter::::probe_callback) + } else { + None + }, get_features: if T::HAS_GET_FEATURES { Some(Adapter::::get_features_callback) } else { @@ -583,6 +590,11 @@ pub trait Driver { kernel::build_error(VTABLE_DEFAULT_ERROR) } + /// Sets up device-specific structures during discovery. + fn probe(_dev: &mut Device) -> Result { + kernel::build_error(VTABLE_DEFAULT_ERROR) + } + /// Probes the hardware to determine what abilities it has. fn get_features(_dev: &mut Device) -> Result { kernel::build_error(VTABLE_DEFAULT_ERROR) diff --git a/rust/kernel/net/phy/reg.rs b/rust/kernel/net/phy/reg.rs new file mode 100644 index 00000000000000..a7db0064cb7d60 --- /dev/null +++ b/rust/kernel/net/phy/reg.rs @@ -0,0 +1,224 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2024 FUJITA Tomonori + +//! PHY register interfaces. +//! +//! This module provides support for accessing PHY registers in the +//! Ethernet management interface clauses 22 and 45 register namespaces, as +//! defined in IEEE 802.3. + +use super::Device; +use crate::build_assert; +use crate::error::*; +use crate::uapi; + +mod private { + /// Marker that a trait cannot be implemented outside of this crate + pub trait Sealed {} +} + +/// Accesses PHY registers. +/// +/// This trait is used to implement the unified interface to access +/// C22 and C45 PHY registers. +/// +/// # Examples +/// +/// ```ignore +/// fn link_change_notify(dev: &mut Device) { +/// // read C22 BMCR register +/// dev.read(C22::BMCR); +/// // read C45 PMA/PMD control 1 register +/// dev.read(C45::new(Mmd::PMAPMD, 0)); +/// +/// // Checks the link status as reported by registers in the C22 namespace +/// // and updates current link state. +/// dev.genphy_read_status::(); +/// // Checks the link status as reported by registers in the C45 namespace +/// // and updates current link state. +/// dev.genphy_read_status::(); +/// } +/// ``` +pub trait Register: private::Sealed { + /// Reads a PHY register. + fn read(&self, dev: &mut Device) -> Result; + + /// Writes a PHY register. + fn write(&self, dev: &mut Device, val: u16) -> Result; + + /// Checks the link status and updates current link state. + fn read_status(dev: &mut Device) -> Result; +} + +/// A single MDIO clause 22 register address (5 bits). +#[derive(Copy, Clone, Debug)] +pub struct C22(u8); + +impl C22 { + /// Basic mode control. + pub const BMCR: Self = C22(0x00); + /// Basic mode status. + pub const BMSR: Self = C22(0x01); + /// PHY identifier 1. + pub const PHYSID1: Self = C22(0x02); + /// PHY identifier 2. + pub const PHYSID2: Self = C22(0x03); + /// Auto-negotiation advertisement. + pub const ADVERTISE: Self = C22(0x04); + /// Auto-negotiation link partner base page ability. + pub const LPA: Self = C22(0x05); + /// Auto-negotiation expansion. + pub const EXPANSION: Self = C22(0x06); + /// Auto-negotiation next page transmit. + pub const NEXT_PAGE_TRANSMIT: Self = C22(0x07); + /// Auto-negotiation link partner received next page. + pub const LP_RECEIVED_NEXT_PAGE: Self = C22(0x08); + /// Master-slave control. + pub const MASTER_SLAVE_CONTROL: Self = C22(0x09); + /// Master-slave status. + pub const MASTER_SLAVE_STATUS: Self = C22(0x0a); + /// PSE Control. + pub const PSE_CONTROL: Self = C22(0x0b); + /// PSE Status. + pub const PSE_STATUS: Self = C22(0x0c); + /// MMD Register control. + pub const MMD_CONTROL: Self = C22(0x0d); + /// MMD Register address data. + pub const MMD_DATA: Self = C22(0x0e); + /// Extended status. + pub const EXTENDED_STATUS: Self = C22(0x0f); + + /// Creates a new instance of `C22` with a vendor specific register. + pub const fn vendor_specific() -> Self { + build_assert!( + N > 0x0f && N < 0x20, + "Vendor-specific register address must be between 16 and 31" + ); + C22(N) + } +} + +impl private::Sealed for C22 {} + +impl Register for C22 { + fn read(&self, dev: &mut Device) -> Result { + let phydev = dev.0.get(); + // SAFETY: `phydev` is pointing to a valid object by the type invariant of `Device`. + // So it's just an FFI call, open code of `phy_read()` with a valid `phy_device` pointer + // `phydev`. + let ret = unsafe { + bindings::mdiobus_read((*phydev).mdio.bus, (*phydev).mdio.addr, self.0.into()) + }; + to_result(ret)?; + Ok(ret as u16) + } + + fn write(&self, dev: &mut Device, val: u16) -> Result { + let phydev = dev.0.get(); + // SAFETY: `phydev` is pointing to a valid object by the type invariant of `Device`. + // So it's just an FFI call, open code of `phy_write()` with a valid `phy_device` pointer + // `phydev`. + to_result(unsafe { + bindings::mdiobus_write((*phydev).mdio.bus, (*phydev).mdio.addr, self.0.into(), val) + }) + } + + fn read_status(dev: &mut Device) -> Result { + let phydev = dev.0.get(); + // SAFETY: `phydev` is pointing to a valid object by the type invariant of `Self`. + // So it's just an FFI call. + let ret = unsafe { bindings::genphy_read_status(phydev) }; + to_result(ret)?; + Ok(ret as u16) + } +} + +/// A single MDIO clause 45 register device and address. +#[derive(Copy, Clone, Debug)] +pub struct Mmd(u8); + +impl Mmd { + /// Physical Medium Attachment/Dependent. + pub const PMAPMD: Self = Mmd(uapi::MDIO_MMD_PMAPMD as u8); + /// WAN interface sublayer. + pub const WIS: Self = Mmd(uapi::MDIO_MMD_WIS as u8); + /// Physical coding sublayer. + pub const PCS: Self = Mmd(uapi::MDIO_MMD_PCS as u8); + /// PHY Extender sublayer. + pub const PHYXS: Self = Mmd(uapi::MDIO_MMD_PHYXS as u8); + /// DTE Extender sublayer. + pub const DTEXS: Self = Mmd(uapi::MDIO_MMD_DTEXS as u8); + /// Transmission convergence. + pub const TC: Self = Mmd(uapi::MDIO_MMD_TC as u8); + /// Auto negotiation. + pub const AN: Self = Mmd(uapi::MDIO_MMD_AN as u8); + /// Separated PMA (1). + pub const SEPARATED_PMA1: Self = Mmd(8); + /// Separated PMA (2). + pub const SEPARATED_PMA2: Self = Mmd(9); + /// Separated PMA (3). + pub const SEPARATED_PMA3: Self = Mmd(10); + /// Separated PMA (4). + pub const SEPARATED_PMA4: Self = Mmd(11); + /// OFDM PMA/PMD. + pub const OFDM_PMAPMD: Self = Mmd(12); + /// Power unit. + pub const POWER_UNIT: Self = Mmd(13); + /// Clause 22 extension. + pub const C22_EXT: Self = Mmd(uapi::MDIO_MMD_C22EXT as u8); + /// Vendor specific 1. + pub const VEND1: Self = Mmd(uapi::MDIO_MMD_VEND1 as u8); + /// Vendor specific 2. + pub const VEND2: Self = Mmd(uapi::MDIO_MMD_VEND2 as u8); +} + +/// A single MDIO clause 45 register device and address. +/// +/// Clause 45 uses a 5-bit device address to access a specific MMD within +/// a port, then a 16-bit register address to access a location within +/// that device. `C45` represents this by storing a [`Mmd`] and +/// a register number. +pub struct C45 { + devad: Mmd, + regnum: u16, +} + +impl C45 { + /// Creates a new instance of `C45`. + pub fn new(devad: Mmd, regnum: u16) -> Self { + Self { devad, regnum } + } +} + +impl private::Sealed for C45 {} + +impl Register for C45 { + fn read(&self, dev: &mut Device) -> Result { + let phydev = dev.0.get(); + // SAFETY: `phydev` is pointing to a valid object by the type invariant of `Device`. + // So it's just an FFI call. + let ret = + unsafe { bindings::phy_read_mmd(phydev, self.devad.0.into(), self.regnum.into()) }; + to_result(ret)?; + Ok(ret as u16) + } + + fn write(&self, dev: &mut Device, val: u16) -> Result { + let phydev = dev.0.get(); + // SAFETY: `phydev` is pointing to a valid object by the type invariant of `Device`. + // So it's just an FFI call. + to_result(unsafe { + bindings::phy_write_mmd(phydev, self.devad.0.into(), self.regnum.into(), val) + }) + } + + fn read_status(dev: &mut Device) -> Result { + let phydev = dev.0.get(); + // SAFETY: `phydev` is pointing to a valid object by the type invariant of `Self`. + // So it's just an FFI call. + let ret = unsafe { bindings::genphy_c45_read_status(phydev) }; + to_result(ret)?; + Ok(ret as u16) + } +} diff --git a/rust/kernel/sizes.rs b/rust/kernel/sizes.rs new file mode 100644 index 00000000000000..834c343e4170f5 --- /dev/null +++ b/rust/kernel/sizes.rs @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Commonly used sizes. +//! +//! C headers: [`include/linux/sizes.h`](srctree/include/linux/sizes.h). + +/// 0x00000400 +pub const SZ_1K: usize = bindings::SZ_1K as usize; +/// 0x00000800 +pub const SZ_2K: usize = bindings::SZ_2K as usize; +/// 0x00001000 +pub const SZ_4K: usize = bindings::SZ_4K as usize; +/// 0x00002000 +pub const SZ_8K: usize = bindings::SZ_8K as usize; +/// 0x00004000 +pub const SZ_16K: usize = bindings::SZ_16K as usize; +/// 0x00008000 +pub const SZ_32K: usize = bindings::SZ_32K as usize; +/// 0x00010000 +pub const SZ_64K: usize = bindings::SZ_64K as usize; +/// 0x00020000 +pub const SZ_128K: usize = bindings::SZ_128K as usize; +/// 0x00040000 +pub const SZ_256K: usize = bindings::SZ_256K as usize; +/// 0x00080000 +pub const SZ_512K: usize = bindings::SZ_512K as usize; diff --git a/rust/uapi/uapi_helper.h b/rust/uapi/uapi_helper.h index 08f5e9334c9e83..76d3f103e76499 100644 --- a/rust/uapi/uapi_helper.h +++ b/rust/uapi/uapi_helper.h @@ -7,5 +7,6 @@ */ #include +#include #include #include