rockchip: add kernel 6.1 support
This commit is contained in:
parent
dca450d9a6
commit
f247ce474f
@ -1,2 +0,0 @@
|
||||
LINUX_VERSION-6.0 = .10
|
||||
LINUX_KERNEL_HASH-6.0.10 = 39e57fcd84cd70bfa3e1a4185d3aa0ed7f1432f24c6548d16326b0c3c9541dd0
|
2
include/kernel-6.1
Normal file
2
include/kernel-6.1
Normal file
@ -0,0 +1,2 @@
|
||||
LINUX_VERSION-6.1 =
|
||||
LINUX_KERNEL_HASH-6.1 = 2ca1f17051a430f6fed1196e4952717507171acfd97d96577212502703b25deb
|
@ -221,11 +221,6 @@ $(eval $(call nf_add,NF_NATHELPER_EXTRA,CONFIG_NF_CONNTRACK_IRC, $(P_XT)nf_connt
|
||||
$(eval $(call nf_add,NF_NATHELPER_EXTRA,CONFIG_NF_NAT_IRC, $(P_XT)nf_nat_irc))
|
||||
|
||||
|
||||
# ulog
|
||||
|
||||
$(eval $(call nf_add,IPT_ULOG,CONFIG_IP_NF_TARGET_ULOG, $(P_V4)ipt_ULOG))
|
||||
|
||||
|
||||
# nflog
|
||||
|
||||
$(eval $(call nf_add,IPT_NFLOG,CONFIG_NETFILTER_XT_TARGET_NFLOG, $(P_XT)xt_NFLOG))
|
||||
@ -311,7 +306,6 @@ $(eval $(call nf_add,EBTABLES_IP4,CONFIG_BRIDGE_EBT_SNAT, $(P_EBT)ebt_snat))
|
||||
|
||||
# watchers
|
||||
$(eval $(call nf_add,EBTABLES_WATCHERS,CONFIG_BRIDGE_EBT_LOG, $(P_EBT)ebt_log))
|
||||
$(eval $(call nf_add,EBTABLES_WATCHERS,CONFIG_BRIDGE_EBT_ULOG, $(P_EBT)ebt_ulog))
|
||||
$(eval $(call nf_add,EBTABLES_WATCHERS,CONFIG_BRIDGE_EBT_NFLOG, $(P_EBT)ebt_nflog))
|
||||
$(eval $(call nf_add,EBTABLES_WATCHERS,CONFIG_BRIDGE_EBT_NFQUEUE, $(P_EBT)ebt_nfqueue))
|
||||
|
||||
@ -374,7 +368,6 @@ IPT_BUILTIN += $(IPT_NAT6-y)
|
||||
IPT_BUILTIN += $(IPT_NAT_EXTRA-y)
|
||||
IPT_BUILTIN += $(NF_NATHELPER-y)
|
||||
IPT_BUILTIN += $(NF_NATHELPER_EXTRA-y)
|
||||
IPT_BUILTIN += $(IPT_ULOG-y)
|
||||
IPT_BUILTIN += $(IPT_TPROXY-y)
|
||||
IPT_BUILTIN += $(NFNETLINK-y)
|
||||
IPT_BUILTIN += $(NFNETLINK_LOG-y)
|
||||
|
@ -447,32 +447,6 @@ endef
|
||||
|
||||
$(eval $(call KernelPackage,crypto-kpp))
|
||||
|
||||
define KernelPackage/crypto-lib-blake2s
|
||||
TITLE:=BLAKE2s hash function library
|
||||
KCONFIG:=CONFIG_CRYPTO_LIB_BLAKE2S
|
||||
HIDDEN:=1
|
||||
FILES:= \
|
||||
$(LINUX_DIR)/lib/crypto/libblake2s.ko \
|
||||
$(LINUX_DIR)/lib/crypto/libblake2s-generic.ko
|
||||
$(call AddDepends/crypto,+PACKAGE_kmod-crypto-hash:kmod-crypto-hash)
|
||||
endef
|
||||
|
||||
define KernelPackage/crypto-lib-blake2s/config
|
||||
imply PACKAGE_kmod-crypto-hash
|
||||
endef
|
||||
|
||||
define KernelPackage/crypto-lib-blake2s/x86/64
|
||||
KCONFIG+=CONFIG_CRYPTO_BLAKE2S_X86
|
||||
FILES+=$(LINUX_DIR)/arch/x86/crypto/blake2s-x86_64.ko
|
||||
endef
|
||||
|
||||
define KernelPackage/crypto-lib-blake2s/arm
|
||||
KCONFIG+=CONFIG_CRYPTO_BLAKE2S_ARM
|
||||
FILES+=$(LINUX_DIR)/arch/arm/crypto/blake2s-arm.ko
|
||||
endef
|
||||
|
||||
$(eval $(call KernelPackage,crypto-lib-blake2s))
|
||||
|
||||
|
||||
define KernelPackage/crypto-lib-chacha20
|
||||
TITLE:=ChaCha library interface
|
||||
|
@ -109,9 +109,9 @@ define KernelPackage/fs-cifs
|
||||
+kmod-crypto-ccm \
|
||||
+kmod-crypto-ecb \
|
||||
+kmod-crypto-des \
|
||||
+(LINUX_5_15||LINUX_6_0):kmod-asn1-decoder \
|
||||
+(LINUX_5_15||LINUX_6_0):kmod-oid-registry \
|
||||
+(LINUX_5_15||LINUX_6_0):kmod-dnsresolver
|
||||
+(LINUX_5_15||LINUX_6_1):kmod-asn1-decoder \
|
||||
+(LINUX_5_15||LINUX_6_1):kmod-oid-registry \
|
||||
+(LINUX_5_15||LINUX_6_1):kmod-dnsresolver
|
||||
endef
|
||||
|
||||
define KernelPackage/fs-cifs/description
|
||||
@ -530,7 +530,7 @@ $(eval $(call KernelPackage,fs-ntfs))
|
||||
define KernelPackage/fs-ntfs3
|
||||
SUBMENU:=$(FS_MENU)
|
||||
TITLE:=NTFS3 Read-Write file system support
|
||||
DEPENDS:=@(LINUX_5_4||LINUX_5_10||LINUX_5_15||LINUX_6_0) +kmod-nls-base
|
||||
DEPENDS:= +kmod-nls-base
|
||||
KCONFIG:= \
|
||||
CONFIG_NTFS3_FS \
|
||||
CONFIG_NTFS3_64BIT_CLUSTER=y \
|
||||
|
@ -134,6 +134,7 @@ define KernelPackage/lib-zstd
|
||||
$(LINUX_DIR)/crypto/zstd.ko \
|
||||
$(LINUX_DIR)/lib/xxhash.ko \
|
||||
$(LINUX_DIR)/lib/zstd/zstd_compress.ko \
|
||||
$(LINUX_DIR)/lib/zstd/zstd_common.ko@ge6.1 \
|
||||
$(LINUX_DIR)/lib/zstd/zstd_decompress.ko
|
||||
AUTOLOAD:=$(call AutoProbe,xxhash zstd zstd_compress zstd_decompress)
|
||||
endef
|
||||
@ -151,13 +152,15 @@ define KernelPackage/lib-lz4
|
||||
DEPENDS:=+kmod-crypto-acompress
|
||||
KCONFIG:= \
|
||||
CONFIG_CRYPTO_LZ4 \
|
||||
CONFIG_CRYPTO_LZ4HC \
|
||||
CONFIG_LZ4_COMPRESS \
|
||||
CONFIG_LZ4_DECOMPRESS
|
||||
FILES:= \
|
||||
$(LINUX_DIR)/crypto/lz4.ko \
|
||||
$(LINUX_DIR)/lib/lz4/lz4_compress.ko \
|
||||
$(LINUX_DIR)/lib/lz4/lz4hc_compress.ko \
|
||||
$(LINUX_DIR)/lib/lz4/lz4_decompress.ko
|
||||
AUTOLOAD:=$(call AutoProbe,lz4 lz4_compress lz4_decompress)
|
||||
AUTOLOAD:=$(call AutoProbe,lz4 lz4_compress lz4hc_compress lz4_decompress)
|
||||
endef
|
||||
|
||||
define KernelPackage/lib-lz4/description
|
||||
|
@ -142,7 +142,7 @@ $(eval $(call KernelPackage,mii))
|
||||
define KernelPackage/mdio-devres
|
||||
SUBMENU:=$(NETWORK_DEVICES_MENU)
|
||||
TITLE:=Supports MDIO device registration
|
||||
DEPENDS:=@(LINUX_5_10||LINUX_5_15||LINUX_6_0) +kmod-libphy +(TARGET_armvirt||TARGET_bcm27xx_bcm2708||TARGET_tegra):kmod-of-mdio
|
||||
DEPENDS:=@(LINUX_5_10||LINUX_5_15||LINUX_6_1) +kmod-libphy +(TARGET_armvirt||TARGET_bcm27xx_bcm2708||TARGET_tegra):kmod-of-mdio
|
||||
KCONFIG:=CONFIG_MDIO_DEVRES
|
||||
HIDDEN:=1
|
||||
FILES:=$(LINUX_DIR)/drivers/net/phy/mdio_devres.ko
|
||||
@ -597,7 +597,7 @@ $(eval $(call KernelPackage,8139cp))
|
||||
define KernelPackage/r8169
|
||||
SUBMENU:=$(NETWORK_DEVICES_MENU)
|
||||
TITLE:=RealTek RTL-8169 PCI Gigabit Ethernet Adapter kernel support
|
||||
DEPENDS:=@PCI_SUPPORT +kmod-mii +r8169-firmware +kmod-phy-realtek +(LINUX_5_10||LINUX_5_15||LINUX_6_0):kmod-mdio-devres
|
||||
DEPENDS:=@PCI_SUPPORT +kmod-mii +r8169-firmware +kmod-phy-realtek +(LINUX_5_10||LINUX_5_15||LINUX_6_1):kmod-mdio-devres
|
||||
KCONFIG:= \
|
||||
CONFIG_R8169 \
|
||||
CONFIG_R8169_NAPI=y \
|
||||
@ -723,7 +723,7 @@ $(eval $(call KernelPackage,igbvf))
|
||||
define KernelPackage/ixgbe
|
||||
SUBMENU:=$(NETWORK_DEVICES_MENU)
|
||||
TITLE:=Intel(R) 82598/82599 PCI-Express 10 Gigabit Ethernet support
|
||||
DEPENDS:=@PCI_SUPPORT +kmod-mdio +kmod-ptp +kmod-hwmon-core +kmod-libphy +(LINUX_5_10||LINUX_5_15||LINUX_6_0):kmod-mdio-devres
|
||||
DEPENDS:=@PCI_SUPPORT +kmod-mdio +kmod-ptp +kmod-hwmon-core +kmod-libphy +(LINUX_5_10||LINUX_5_15||LINUX_6_1):kmod-mdio-devres
|
||||
KCONFIG:=CONFIG_IXGBE \
|
||||
CONFIG_IXGBE_VXLAN=n \
|
||||
CONFIG_IXGBE_HWMON=y \
|
||||
|
@ -604,23 +604,6 @@ endef
|
||||
$(eval $(call KernelPackage,nf-nathelper-extra))
|
||||
|
||||
|
||||
define KernelPackage/ipt-ulog
|
||||
TITLE:=Module for user-space packet logging
|
||||
KCONFIG:=$(KCONFIG_IPT_ULOG)
|
||||
FILES:=$(foreach mod,$(IPT_ULOG-m),$(LINUX_DIR)/net/$(mod).ko)
|
||||
AUTOLOAD:=$(call AutoProbe,$(notdir $(IPT_ULOG-m)))
|
||||
$(call AddDepends/ipt)
|
||||
endef
|
||||
|
||||
define KernelPackage/ipt-ulog/description
|
||||
Netfilter (IPv4) module for user-space packet logging
|
||||
Includes:
|
||||
- ULOG
|
||||
endef
|
||||
|
||||
$(eval $(call KernelPackage,ipt-ulog))
|
||||
|
||||
|
||||
define KernelPackage/ipt-nflog
|
||||
TITLE:=Module for user-space packet logging
|
||||
KCONFIG:=$(KCONFIG_IPT_NFLOG)
|
||||
|
@ -565,6 +565,23 @@ endef
|
||||
$(eval $(call KernelPackage,veth))
|
||||
|
||||
|
||||
define KernelPackage/vrf
|
||||
SUBMENU:=$(NETWORK_SUPPORT_MENU)
|
||||
TITLE:=Virtual Routing and Forwarding (Lite)
|
||||
DEPENDS:=@KERNEL_NET_L3_MASTER_DEV
|
||||
KCONFIG:=CONFIG_NET_VRF
|
||||
FILES:=$(LINUX_DIR)/drivers/net/vrf.ko
|
||||
AUTOLOAD:=$(call AutoLoad,30,vrf)
|
||||
endef
|
||||
|
||||
define KernelPackage/vrf/description
|
||||
This option enables the support for mapping interfaces into VRF's. The
|
||||
support enables VRF devices.
|
||||
endef
|
||||
|
||||
$(eval $(call KernelPackage,vrf))
|
||||
|
||||
|
||||
define KernelPackage/slhc
|
||||
SUBMENU:=$(NETWORK_SUPPORT_MENU)
|
||||
HIDDEN:=1
|
||||
@ -1296,7 +1313,6 @@ define KernelPackage/wireguard
|
||||
SUBMENU:=$(NETWORK_SUPPORT_MENU)
|
||||
TITLE:=WireGuard secure network tunnel
|
||||
DEPENDS:= \
|
||||
+kmod-crypto-lib-blake2s \
|
||||
+kmod-crypto-lib-chacha20poly1305 \
|
||||
+kmod-crypto-lib-curve25519 \
|
||||
+kmod-udptunnel4 \
|
||||
@ -1340,11 +1356,11 @@ define KernelPackage/qrtr
|
||||
SUBMENU:=$(NETWORK_SUPPORT_MENU)
|
||||
TITLE:=Qualcomm IPC Router support
|
||||
HIDDEN:=1
|
||||
DEPENDS:=@(LINUX_5_15||LINUX_6_0)
|
||||
DEPENDS:=@(LINUX_5_15||LINUX_6_1)
|
||||
KCONFIG:=CONFIG_QRTR
|
||||
FILES:= \
|
||||
$(LINUX_DIR)/net/qrtr/qrtr.ko \
|
||||
$(LINUX_DIR)/net/qrtr/ns.ko
|
||||
$(LINUX_DIR)/net/qrtr/ns.ko@lt6.0
|
||||
AUTOLOAD:=$(call AutoProbe,qrtr)
|
||||
endef
|
||||
|
||||
|
@ -916,6 +916,10 @@ define KernelPackage/zram/config
|
||||
bool "lz4"
|
||||
select PACKAGE_kmod-lib-lz4
|
||||
|
||||
config ZRAM_DEF_COMP_LZ4HC
|
||||
bool "lz4-hc"
|
||||
select PACKAGE_kmod-lib-lz4hc
|
||||
|
||||
config ZRAM_DEF_COMP_ZSTD
|
||||
bool "zstd"
|
||||
select PACKAGE_kmod-lib-zstd
|
||||
@ -1136,8 +1140,8 @@ $(eval $(call KernelPackage,keys-trusted))
|
||||
define KernelPackage/tpm
|
||||
SUBMENU:=$(OTHER_MENU)
|
||||
TITLE:=TPM Hardware Support
|
||||
DEPENDS:= +kmod-random-core +(LINUX_5_15||LINUX_6_0):kmod-asn1-decoder \
|
||||
+(LINUX_5_15||LINUX_6_0):kmod-asn1-encoder +(LINUX_5_15||LINUX_6_0):kmod-oid-registry
|
||||
DEPENDS:= +kmod-random-core +(LINUX_5_15||LINUX_6_1):kmod-asn1-decoder \
|
||||
+(LINUX_5_15||LINUX_6_1):kmod-asn1-encoder +(LINUX_5_15||LINUX_6_1):kmod-oid-registry
|
||||
KCONFIG:= CONFIG_TCG_TPM
|
||||
FILES:= $(LINUX_DIR)/drivers/char/tpm/tpm.ko
|
||||
AUTOLOAD:=$(call AutoLoad,10,tpm,1)
|
||||
@ -1283,7 +1287,7 @@ $(eval $(call KernelPackage,qcom-qmi-helpers))
|
||||
define KernelPackage/mhi
|
||||
SUBMENU:=$(OTHER_MENU)
|
||||
TITLE:=Modem Host Interface (MHI) bus
|
||||
DEPENDS:=@(LINUX_5_15||LINUX_6_0)
|
||||
DEPENDS:=@(LINUX_5_15||LINUX_6_1)
|
||||
KCONFIG:=CONFIG_MHI_BUS \
|
||||
CONFIG_MHI_BUS_DEBUG=y \
|
||||
CONFIG_MHI_BUS_PCI_GENERIC=n \
|
||||
|
@ -1138,7 +1138,8 @@ $(eval $(call KernelPackage,usb-net-aqc111))
|
||||
|
||||
define KernelPackage/usb-net-asix
|
||||
TITLE:=Kernel module for USB-to-Ethernet Asix convertors
|
||||
DEPENDS:=+kmod-libphy +(LINUX_5_15||LINUX_6_0):kmod-mdio-devres
|
||||
DEPENDS:=+(LINUX_5_4||LINUX_5_10):kmod-libphy \
|
||||
+(LINUX_5_15||LINUX_6_1):kmod-mdio-devres +LINUX_6_1:kmod-phylink
|
||||
KCONFIG:=CONFIG_USB_NET_AX8817X
|
||||
FILES:= \
|
||||
$(LINUX_DIR)/drivers/$(USBNET_DIR)/asix.ko \
|
||||
@ -1153,7 +1154,6 @@ endef
|
||||
|
||||
$(eval $(call KernelPackage,usb-net-asix))
|
||||
|
||||
|
||||
define KernelPackage/usb-net-asix-ax88179
|
||||
TITLE:=Kernel module for USB-to-Gigabit-Ethernet Asix convertors
|
||||
DEPENDS:=+kmod-libphy
|
||||
|
@ -28,7 +28,9 @@ define KernelPackage/backlight
|
||||
CONFIG_BACKLIGHT_ADP8870=n \
|
||||
CONFIG_BACKLIGHT_OT200=n \
|
||||
CONFIG_BACKLIGHT_PM8941_WLED=n
|
||||
FILES:=$(LINUX_DIR)/drivers/video/backlight/backlight.ko
|
||||
FILES:=$(LINUX_DIR)/drivers/video/backlight/backlight.ko \
|
||||
$(LINUX_DIR)/drivers/acpi/video.ko@ge6.1 \
|
||||
$(LINUX_DIR)/drivers/platform/x86/wmi.ko@ge6.1
|
||||
AUTOLOAD:=$(call AutoProbe,video backlight)
|
||||
endef
|
||||
|
||||
@ -243,8 +245,8 @@ define KernelPackage/drm
|
||||
SUBMENU:=$(VIDEO_MENU)
|
||||
TITLE:=Direct Rendering Manager (DRM) support
|
||||
HIDDEN:=1
|
||||
DEPENDS:=+kmod-dma-buf +kmod-i2c-core +kmod-i2c-algo-bit +PACKAGE_kmod-backlight:kmod-backlight \
|
||||
+(LINUX_5_15||LINUX_6_0):kmod-fb
|
||||
DEPENDS:=+kmod-dma-buf +kmod-i2c-core +kmod-i2c-algo-bit +kmod-backlight \
|
||||
+(LINUX_5_15||LINUX_6_1):kmod-fb
|
||||
KCONFIG:= \
|
||||
CONFIG_DRM \
|
||||
CONFIG_DRM_PANEL_ORIENTATION_QUIRKS=y \
|
||||
@ -266,7 +268,7 @@ $(eval $(call KernelPackage,drm))
|
||||
define KernelPackage/drm-buddy
|
||||
SUBMENU:=$(VIDEO_MENU)
|
||||
TITLE:=A page based buddy allocator
|
||||
DEPENDS:=@TARGET_x86 @DISPLAY_SUPPORT +kmod-drm @(LINUX_6_0)
|
||||
DEPENDS:=@TARGET_x86 @DISPLAY_SUPPORT +kmod-drm @LINUX_6_1
|
||||
KCONFIG:=CONFIG_DRM_BUDDY
|
||||
FILES:= $(LINUX_DIR)/drivers/gpu/drm/drm_buddy.ko
|
||||
AUTOLOAD:=$(call AutoProbe,drm_buddy)
|
||||
@ -311,7 +313,7 @@ $(eval $(call KernelPackage,drm-kms-helper))
|
||||
define KernelPackage/drm-display-helper
|
||||
SUBMENU:=$(VIDEO_MENU)
|
||||
TITLE:=DRM helpers for display adapters drivers
|
||||
DEPENDS:=@DISPLAY_SUPPORT +kmod-drm +TARGET_x86:kmod-drm-buddy @(LINUX_6_0)
|
||||
DEPENDS:=@DISPLAY_SUPPORT +kmod-drm +TARGET_x86:kmod-drm-buddy @LINUX_6_1
|
||||
KCONFIG:=CONFIG_DRM_DISPLAY_HELPER
|
||||
FILES:=$(LINUX_DIR)/drivers/gpu/drm/display/drm_display_helper.ko
|
||||
AUTOLOAD:=$(call AutoProbe,drm_display_helper)
|
||||
@ -328,7 +330,7 @@ define KernelPackage/drm-amdgpu
|
||||
TITLE:=AMDGPU DRM support
|
||||
DEPENDS:=@TARGET_x86 @DISPLAY_SUPPORT +kmod-backlight +kmod-drm-ttm \
|
||||
+kmod-drm-kms-helper +kmod-i2c-algo-bit +amdgpu-firmware \
|
||||
+(LINUX_6_0):kmod-drm-display-helper
|
||||
+LINUX_6_1:kmod-drm-display-helper
|
||||
KCONFIG:=CONFIG_DRM_AMDGPU \
|
||||
CONFIG_DRM_AMDGPU_SI=y \
|
||||
CONFIG_DRM_AMDGPU_CIK=y \
|
||||
@ -1103,7 +1105,7 @@ define KernelPackage/drm-i915
|
||||
SUBMENU:=$(VIDEO_MENU)
|
||||
TITLE:=Intel GPU drm support
|
||||
DEPENDS:=@TARGET_x86 +kmod-drm-ttm +kmod-drm-kms-helper +i915-firmware \
|
||||
+(LINUX_6_0):kmod-drm-display-helper
|
||||
+LINUX_6_1:kmod-drm-display-helper
|
||||
KCONFIG:= \
|
||||
CONFIG_INTEL_GTT \
|
||||
CONFIG_DRM_I915 \
|
||||
|
@ -7,12 +7,12 @@ include $(TOPDIR)/rules.mk
|
||||
include $(INCLUDE_DIR)/kernel.mk
|
||||
|
||||
PKG_NAME:=r8152
|
||||
PKG_VERSION:=2.16.1
|
||||
PKG_RELEASE:=1
|
||||
PKG_VERSION:=2.16.3.20220914
|
||||
PKG_RELEASE:=3
|
||||
|
||||
PKG_SOURCE:=$(PKG_NAME)-$(PKG_VERSION).tar.gz
|
||||
PKG_SOURCE_URL:=https://codeload.github.com/wget/realtek-r8152-linux/tar.gz/v$(PKG_VERSION)?
|
||||
PKG_HASH:=2be6a02f6e29485efd107bb7e777ad3c482d9db0ff7e5e6c5ef034a1557a395b
|
||||
PKG_HASH:=61ed7af34c8882c6028ddd1a27bb78fb5bfba41211f84dd7a06e4dc84dbe9a9a
|
||||
|
||||
PKG_BUILD_DIR:=$(KERNEL_BUILD_DIR)/realtek-$(PKG_NAME)-linux-$(PKG_VERSION)
|
||||
|
38
package/kernel/r8152/patches/020-6.1-support.patch
Normal file
38
package/kernel/r8152/patches/020-6.1-support.patch
Normal file
@ -0,0 +1,38 @@
|
||||
--- a/compatibility.h
|
||||
+++ b/compatibility.h
|
||||
@@ -237,9 +237,15 @@
|
||||
#define napi_disable(napi_ptr) netif_poll_disable(container_of(napi_ptr, struct r8152, napi)->netdev)
|
||||
#define napi_schedule(napi_ptr) netif_rx_schedule(container_of(napi_ptr, struct r8152, napi)->netdev)
|
||||
#define napi_complete(napi_ptr) netif_rx_complete(container_of(napi_ptr, struct r8152, napi)->netdev)
|
||||
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 1, 0)
|
||||
+ #define netif_napi_add_weight(ndev, napi_ptr, function, weight_t) \
|
||||
+ ndev->poll = function; \
|
||||
+ ndev->weight = weight_t;
|
||||
+#else
|
||||
#define netif_napi_add(ndev, napi_ptr, function, weight_t) \
|
||||
ndev->poll = function; \
|
||||
ndev->weight = weight_t;
|
||||
+#endif
|
||||
typedef unsigned long uintptr_t;
|
||||
#define DMA_BIT_MASK(value) \
|
||||
(value < 64 ? ((1ULL << value) - 1) : 0xFFFFFFFFFFFFFFFFULL)
|
||||
--- a/r8152.c
|
||||
+++ b/r8152.c
|
||||
@@ -20718,10 +20718,17 @@
|
||||
|
||||
usb_set_intfdata(intf, tp);
|
||||
|
||||
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 1, 0)
|
||||
+ if (tp->support_2500full)
|
||||
+ netif_napi_add_weight(netdev, &tp->napi, r8152_poll, 256);
|
||||
+ else
|
||||
+ netif_napi_add_weight(netdev, &tp->napi, r8152_poll, 64);
|
||||
+#else
|
||||
if (tp->support_2500full)
|
||||
netif_napi_add(netdev, &tp->napi, r8152_poll, 256);
|
||||
else
|
||||
netif_napi_add(netdev, &tp->napi, r8152_poll, 64);
|
||||
+#endif
|
||||
|
||||
ret = register_netdev(netdev);
|
||||
if (ret != 0) {
|
@ -7,13 +7,13 @@ include $(TOPDIR)/rules.mk
|
||||
include $(INCLUDE_DIR)/kernel.mk
|
||||
|
||||
PKG_NAME:=r8168
|
||||
PKG_VERSION:=8.050.03
|
||||
PKG_RELEASE:=$(AUTORELEASE)
|
||||
PKG_VERSION:=8.051.02
|
||||
PKG_RELEASE:=1
|
||||
|
||||
PKG_SOURCE_PROTO:=git
|
||||
PKG_SOURCE_URL:=https://github.com/BROBIRD/openwrt-r8168.git
|
||||
PKG_SOURCE_VERSION:=ddfaceacd1b7ed2857fb995642a8ffb1fc37e989
|
||||
PKG_MIRROR_HASH:=5428f60dc33e9503c6cfdf690c00077149dce24cbb0591129d905b9f1aad9202
|
||||
PKG_SOURCE_VERSION:=4f6cfe1ca12fb772deed57f1d2d1062af041ad07
|
||||
PKG_MIRROR_HASH:=6b149f5eb3b9e1dc50867a694984d253aa58d97dd5fbab30eb405d2d7b2be587
|
||||
|
||||
PKG_BUILD_DIR:=$(KERNEL_BUILD_DIR)/$(PKG_NAME)-$(PKG_VERSION)
|
||||
|
||||
|
@ -8,7 +8,7 @@
|
||||
#include <linux/if_vlan.h>
|
||||
#include <linux/crc32.h>
|
||||
#include <linux/interrupt.h>
|
||||
@@ -24643,6 +24644,22 @@ rtl8168_set_bios_setting(struct net_devi
|
||||
@@ -24769,6 +24770,22 @@ rtl8168_set_bios_setting(struct net_devi
|
||||
}
|
||||
}
|
||||
|
||||
@ -31,7 +31,7 @@
|
||||
static void
|
||||
rtl8168_init_software_variable(struct net_device *dev)
|
||||
{
|
||||
@@ -25206,6 +25223,8 @@ rtl8168_init_software_variable(struct ne
|
||||
@@ -25343,6 +25360,8 @@ rtl8168_init_software_variable(struct ne
|
||||
tp->NotWrMcuPatchCode = TRUE;
|
||||
}
|
||||
|
||||
|
@ -1,47 +0,0 @@
|
||||
--- a/src/r8168_n.c
|
||||
+++ b/src/r8168_n.c
|
||||
@@ -3715,7 +3715,11 @@
|
||||
txd->opts2 = 0;
|
||||
while (1) {
|
||||
memset(tmpAddr, pattern++, len - 14);
|
||||
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5,18,0)
|
||||
pci_dma_sync_single_for_device(tp->pci_dev,
|
||||
+#else
|
||||
+ dma_sync_single_for_device(tp_to_dev(tp),
|
||||
+#endif
|
||||
le64_to_cpu(mapping),
|
||||
len, DMA_TO_DEVICE);
|
||||
txd->opts1 = cpu_to_le32(DescOwn | FirstFrag | LastFrag | len);
|
||||
@@ -3743,7 +3747,11 @@
|
||||
if (rx_len == len) {
|
||||
dma_sync_single_for_cpu(tp_to_dev(tp), le64_to_cpu(rxd->addr), tp->rx_buf_sz, DMA_FROM_DEVICE);
|
||||
i = memcmp(skb->data, rx_skb->data, rx_len);
|
||||
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5,18,0)
|
||||
pci_dma_sync_single_for_device(tp->pci_dev, le64_to_cpu(rxd->addr), tp->rx_buf_sz, DMA_FROM_DEVICE);
|
||||
+#else
|
||||
+ dma_sync_single_for_device(tp_to_dev(tp), le64_to_cpu(rxd->addr), tp->rx_buf_sz, DMA_FROM_DEVICE);
|
||||
+#endif
|
||||
if (i == 0) {
|
||||
// dev_printk(KERN_INFO, tp_to_dev(tp), "loopback test finished\n",rx_len,len);
|
||||
break;
|
||||
@@ -26464,11 +26472,20 @@
|
||||
|
||||
if ((sizeof(dma_addr_t) > 4) &&
|
||||
use_dac &&
|
||||
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5,18,0)
|
||||
!pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) &&
|
||||
!pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) {
|
||||
+#else
|
||||
+ !dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)) &&
|
||||
+ !dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64))) {
|
||||
+#endif
|
||||
dev->features |= NETIF_F_HIGHDMA;
|
||||
} else {
|
||||
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5,18,0)
|
||||
rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
|
||||
+#else
|
||||
+ rc = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
|
||||
+#endif
|
||||
if (rc < 0) {
|
||||
#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,0)
|
||||
if (netif_msg_probe(tp))
|
14
package/kernel/r8168/patches/030-6.1-support.patch
Normal file
14
package/kernel/r8168/patches/030-6.1-support.patch
Normal file
@ -0,0 +1,14 @@
|
||||
--- a/src/r8168.h
|
||||
--- b/src/r8168.h
|
||||
@@ -566,7 +566,11 @@
|
||||
typedef struct napi_struct *napi_ptr;
|
||||
typedef int napi_budget;
|
||||
|
||||
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 1, 0)
|
||||
+#define RTL_NAPI_CONFIG(ndev, priv, function, weight) netif_napi_add_weight(ndev, &priv->napi, function, weight)
|
||||
+#else
|
||||
#define RTL_NAPI_CONFIG(ndev, priv, function, weight) netif_napi_add(ndev, &priv->napi, function, weight)
|
||||
+#endif
|
||||
#define RTL_NAPI_QUOTA(budget, ndev) min(budget, budget)
|
||||
#define RTL_GET_PRIV(stuct_ptr, priv_struct) container_of(stuct_ptr, priv_struct, stuct_ptr)
|
||||
#define RTL_GET_NETDEV(priv_ptr) struct net_device *dev = priv_ptr->dev;
|
@ -1,25 +0,0 @@
|
||||
From ea9e2477624adaa40e8a553ef876f60ec8d3150c Mon Sep 17 00:00:00 2001
|
||||
From: W_Y_CPP <383152993@qq.com>
|
||||
Date: Fri, 18 Feb 2022 00:53:12 -0500
|
||||
Subject: [PATCH] refresh
|
||||
|
||||
---
|
||||
xt_FULLCONENAT.c | 3 +--
|
||||
1 file changed, 1 insertion(+), 2 deletions(-)
|
||||
|
||||
diff --git a/xt_FULLCONENAT.c b/xt_FULLCONENAT.c
|
||||
index f96cfd8a3..237666039 100644
|
||||
--- a/xt_FULLCONENAT.c
|
||||
+++ b/xt_FULLCONENAT.c
|
||||
@@ -1258,8 +1258,7 @@ static int fullconenat_tg_check(const struct xt_tgchk_param *par)
|
||||
#endif
|
||||
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 15, 0) && !defined(CONFIG_NF_CONNTRACK_CHAIN_EVENTS)
|
||||
- if (!READ_ONCE(par->net->ct.nf_conntrack_event_cb)) {
|
||||
- nf_conntrack_register_notifier(par->net, &ct_event_notifier);
|
||||
+ if (!READ_ONCE(par->net->ct.nf_conntrack_event_cb)&&(nf_conntrack_register_notifier(par->net, &ct_event_notifier)==0)) {
|
||||
#else
|
||||
if (nf_conntrack_register_notifier(par->net, &ct_event_notifier) == 0) {
|
||||
#endif
|
||||
--
|
||||
2.17.1
|
@ -13,9 +13,9 @@ PKG_RELEASE:=2
|
||||
|
||||
PKG_SOURCE_PROTO:=git
|
||||
PKG_SOURCE_URL=$(PROJECT_GIT)/project/firewall3.git
|
||||
PKG_SOURCE_DATE:=2022-01-10
|
||||
PKG_SOURCE_VERSION:=0f16ea5f055722a532d4e68c7ba34ed084b48b37
|
||||
PKG_MIRROR_HASH:=219478ef95b170b5122030715eac7b3317f2ac4d67e1a936c22a78b10e056123
|
||||
PKG_SOURCE_DATE:=2021-03-23
|
||||
PKG_SOURCE_VERSION:=61db17edddb1f05e8107f0dbef6f7d060ce67483
|
||||
PKG_MIRROR_HASH:=b2eb09816640e14e2dae21fb54ea05c33858fe0004844fe8d99e541a2e19e9c0
|
||||
PKG_MAINTAINER:=Jo-Philipp Wich <jo@mein.io>
|
||||
PKG_LICENSE:=ISC
|
||||
|
||||
@ -59,4 +59,4 @@ define Package/firewall/install
|
||||
$(INSTALL_CONF) $(PKG_BUILD_DIR)/helpers.conf $(1)/usr/share/fw3
|
||||
endef
|
||||
|
||||
$(eval $(call BuildPackage,firewall))
|
||||
$(eval $(call BuildPackage,firewall))
|
||||
|
@ -3,7 +3,7 @@ config defaults
|
||||
option input ACCEPT
|
||||
option output ACCEPT
|
||||
option forward REJECT
|
||||
option fullcone 0
|
||||
option fullcone 2
|
||||
# Uncomment this line to disable ipv6 rules
|
||||
# option disable_ipv6 1
|
||||
|
||||
|
@ -0,0 +1,38 @@
|
||||
From df1306a96127e91ff2d513a0a67345baaf61d113 Mon Sep 17 00:00:00 2001
|
||||
From: Florian Eckert <fe@dev.tdt.de>
|
||||
Date: Fri, 19 Nov 2021 09:51:02 +0100
|
||||
Subject: [PATCH] firewall3: fix locking issue
|
||||
|
||||
By calling the command 'fw3 reload' several times at the same time, I
|
||||
noticed that the locking was not working properly. It happened from time
|
||||
to time that some firewall rules were present twice in the system!
|
||||
|
||||
By removing the 'unlink' systemcall, this error no longer occurred on my
|
||||
systems.
|
||||
|
||||
Since fw3 does not run as a service, it makes no sense to delete this
|
||||
lock file every time a filehandler is no longer open on this lock file,
|
||||
because fw3 binary is not running.
|
||||
|
||||
If fw3 does run as a service then we can remove this lock file on
|
||||
service stop. But this is not the case for fw3.
|
||||
|
||||
Signed-off-by: Florian Eckert <fe@dev.tdt.de>
|
||||
---
|
||||
utils.c | 1 -
|
||||
1 file changed, 1 deletion(-)
|
||||
|
||||
diff --git a/utils.c b/utils.c
|
||||
index 17d5bf9..92e966c 100644
|
||||
--- a/utils.c
|
||||
+++ b/utils.c
|
||||
@@ -397,7 +397,6 @@ fw3_unlock_path(int *fd, const char *lockpath)
|
||||
warn("Cannot release exclusive lock: %s", strerror(errno));
|
||||
|
||||
close(*fd);
|
||||
- unlink(FW3_LOCKFILE);
|
||||
|
||||
*fd = -1;
|
||||
}
|
||||
--
|
||||
2.30.2
|
@ -1,15 +1,14 @@
|
||||
#
|
||||
# Copyright (C) 2018 Chion Tang <tech@chionlab.moe>
|
||||
# Copyright (C) 2022 Chion Tang <tech@chionlab.moe>
|
||||
#
|
||||
# This is free software, licensed under the GNU General Public License v2.
|
||||
# See /LICENSE for more information.
|
||||
#
|
||||
|
||||
include $(TOPDIR)/rules.mk
|
||||
include $(INCLUDE_DIR)/kernel.mk
|
||||
|
||||
PKG_NAME:=fullconenat
|
||||
PKG_RELEASE:=6
|
||||
PKG_RELEASE:=9
|
||||
|
||||
PKG_SOURCE_DATE:=2022-02-13
|
||||
PKG_SOURCE_PROTO:=git
|
||||
@ -19,7 +18,9 @@ PKG_MIRROR_HASH:=00d749235271dee194dcd23c22e6e85207ea90192a62a110b2af0b4e4de1971
|
||||
|
||||
PKG_LICENSE:=GPL-2.0
|
||||
PKG_LICENSE_FILES:=LICENSE
|
||||
PKG_MAINTAINER:=Chion Tang <tech@chionlab.moe>
|
||||
|
||||
include $(INCLUDE_DIR)/kernel.mk
|
||||
include $(INCLUDE_DIR)/package.mk
|
||||
|
||||
define Package/iptables-mod-fullconenat
|
||||
@ -28,7 +29,6 @@ define Package/iptables-mod-fullconenat
|
||||
CATEGORY:=Network
|
||||
TITLE:=FULLCONENAT iptables extension
|
||||
DEPENDS:=+iptables +kmod-ipt-fullconenat
|
||||
MAINTAINER:=Chion Tang <tech@chionlab.moe>
|
||||
endef
|
||||
|
||||
define Package/iptables-mod-fullconenat/install
|
||||
@ -40,7 +40,6 @@ define KernelPackage/ipt-fullconenat
|
||||
SUBMENU:=Netfilter Extensions
|
||||
TITLE:=FULLCONENAT netfilter module
|
||||
DEPENDS:=+kmod-nf-ipt +kmod-nf-nat
|
||||
MAINTAINER:=Chion Tang <tech@chionlab.moe>
|
||||
KCONFIG:= \
|
||||
CONFIG_NF_CONNTRACK_EVENTS=y \
|
||||
CONFIG_NF_CONNTRACK_CHAIN_EVENTS=y
|
||||
@ -49,20 +48,15 @@ endef
|
||||
|
||||
include $(INCLUDE_DIR)/kernel-defaults.mk
|
||||
|
||||
define Build/Prepare
|
||||
$(call Build/Prepare/Default)
|
||||
$(CP) ./files/Makefile $(PKG_BUILD_DIR)/
|
||||
endef
|
||||
|
||||
define Build/Compile
|
||||
+$(MAKE) $(PKG_JOBS) -C "$(LINUX_DIR)" \
|
||||
CROSS_COMPILE="$(TARGET_CROSS)" \
|
||||
ARCH="$(LINUX_KARCH)" \
|
||||
M="$(PKG_BUILD_DIR)" \
|
||||
EXTRA_CFLAGS="$(BUILDFLAGS)" \
|
||||
modules
|
||||
CROSS_COMPILE="$(TARGET_CROSS)" \
|
||||
ARCH="$(LINUX_KARCH)" \
|
||||
M="$(PKG_BUILD_DIR)" \
|
||||
EXTRA_CFLAGS="$(BUILDFLAGS)" \
|
||||
modules
|
||||
$(call Build/Compile/Default)
|
||||
endef
|
||||
|
||||
$(eval $(call BuildPackage,iptables-mod-fullconenat))
|
||||
$(eval $(call KernelPackage,ipt-fullconenat))
|
||||
$(eval $(call BuildPackage,iptables-mod-fullconenat))
|
@ -0,0 +1,26 @@
|
||||
--- a/xt_FULLCONENAT.c
|
||||
+++ b/xt_FULLCONENAT.c
|
||||
@@ -325,7 +325,11 @@
|
||||
/* for now we do the same thing for both --random and --random-fully */
|
||||
|
||||
/* select a random starting point */
|
||||
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 1, 0)
|
||||
+ start = (uint16_t)(get_random_u32() % (u32)range_size);
|
||||
+#else
|
||||
start = (uint16_t)(prandom_u32() % (u32)range_size);
|
||||
+#endif
|
||||
} else {
|
||||
|
||||
if ((original_port >= min && original_port <= min + range_size - 1)
|
||||
@@ -995,7 +999,11 @@
|
||||
/* for now we do the same thing for both --random and --random-fully */
|
||||
|
||||
/* select a random starting point */
|
||||
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 1, 0)
|
||||
+ start = (uint16_t)(get_random_u32() % (u32)range_size);
|
||||
+#else
|
||||
start = (uint16_t)(prandom_u32() % (u32)range_size);
|
||||
+#endif
|
||||
} else {
|
||||
|
||||
if ((original_port >= min && original_port <= min + range_size - 1)
|
@ -222,19 +222,6 @@ iptables extensions for extra NAT targets.
|
||||
- NETMAP
|
||||
endef
|
||||
|
||||
define Package/iptables-mod-ulog
|
||||
$(call Package/iptables/Module, +kmod-ipt-ulog)
|
||||
TITLE:=user-space packet logging
|
||||
endef
|
||||
|
||||
define Package/iptables-mod-ulog/description
|
||||
iptables extensions for user-space packet logging.
|
||||
|
||||
Targets:
|
||||
- ULOG
|
||||
|
||||
endef
|
||||
|
||||
define Package/iptables-mod-nflog
|
||||
$(call Package/iptables/Module, +kmod-nfnetlink-log +kmod-ipt-nflog)
|
||||
TITLE:=Netfilter NFLOG target
|
||||
@ -674,7 +661,6 @@ $(eval $(call BuildPlugin,iptables-mod-nat-extra,$(IPT_NAT_EXTRA-m)))
|
||||
$(eval $(call BuildPlugin,iptables-mod-iprange,$(IPT_IPRANGE-m)))
|
||||
$(eval $(call BuildPlugin,iptables-mod-cluster,$(IPT_CLUSTER-m)))
|
||||
$(eval $(call BuildPlugin,iptables-mod-clusterip,$(IPT_CLUSTERIP-m)))
|
||||
$(eval $(call BuildPlugin,iptables-mod-ulog,$(IPT_ULOG-m)))
|
||||
$(eval $(call BuildPlugin,iptables-mod-hashlimit,$(IPT_HASHLIMIT-m)))
|
||||
$(eval $(call BuildPlugin,iptables-mod-rpfilter,$(IPT_RPFILTER-m)))
|
||||
$(eval $(call BuildPlugin,iptables-mod-led,$(IPT_LED-m)))
|
||||
|
@ -1,75 +0,0 @@
|
||||
--- a/init/Kconfig
|
||||
+++ b/init/Kconfig
|
||||
@@ -1386,16 +1386,6 @@ config BOOT_CONFIG_EMBED_FILE
|
||||
This bootconfig will be used if there is no initrd or no other
|
||||
bootconfig in the initrd.
|
||||
|
||||
-config INITRAMFS_PRESERVE_MTIME
|
||||
- bool "Preserve cpio archive mtimes in initramfs"
|
||||
- default y
|
||||
- help
|
||||
- Each entry in an initramfs cpio archive carries an mtime value. When
|
||||
- enabled, extracted cpio items take this mtime, with directory mtime
|
||||
- setting deferred until after creation of any child entries.
|
||||
-
|
||||
- If unsure, say Y.
|
||||
-
|
||||
choice
|
||||
prompt "Compiler optimization level"
|
||||
default CC_OPTIMIZE_FOR_PERFORMANCE
|
||||
--- a/init/initramfs.c
|
||||
+++ b/init/initramfs.c
|
||||
@@ -127,17 +127,15 @@ static void __init free_hash(void)
|
||||
}
|
||||
}
|
||||
|
||||
-#ifdef CONFIG_INITRAMFS_PRESERVE_MTIME
|
||||
-static void __init do_utime(char *filename, time64_t mtime)
|
||||
+static long __init do_utime(char *filename, time64_t mtime)
|
||||
{
|
||||
- struct timespec64 t[2] = { { .tv_sec = mtime }, { .tv_sec = mtime } };
|
||||
- init_utimes(filename, t);
|
||||
-}
|
||||
+ struct timespec64 t[2];
|
||||
|
||||
-static void __init do_utime_path(const struct path *path, time64_t mtime)
|
||||
-{
|
||||
- struct timespec64 t[2] = { { .tv_sec = mtime }, { .tv_sec = mtime } };
|
||||
- vfs_utimes(path, t);
|
||||
+ t[0].tv_sec = mtime;
|
||||
+ t[0].tv_nsec = 0;
|
||||
+ t[1].tv_sec = mtime;
|
||||
+ t[1].tv_nsec = 0;
|
||||
+ return init_utimes(filename, t);
|
||||
}
|
||||
|
||||
static __initdata LIST_HEAD(dir_list);
|
||||
@@ -170,12 +168,6 @@ static void __init dir_utime(void)
|
||||
kfree(de);
|
||||
}
|
||||
}
|
||||
-#else
|
||||
-static void __init do_utime(char *filename, time64_t mtime) {}
|
||||
-static void __init do_utime_path(const struct path *path, time64_t mtime) {}
|
||||
-static void __init dir_add(const char *name, time64_t mtime) {}
|
||||
-static void __init dir_utime(void) {}
|
||||
-#endif
|
||||
|
||||
static __initdata time64_t mtime;
|
||||
|
||||
@@ -407,10 +399,14 @@ static int __init do_name(void)
|
||||
static int __init do_copy(void)
|
||||
{
|
||||
if (byte_count >= body_len) {
|
||||
+ struct timespec64 t[2] = { };
|
||||
if (xwrite(wfile, victim, body_len, &wfile_pos) != body_len)
|
||||
error("write error");
|
||||
|
||||
- do_utime_path(&wfile->f_path, mtime);
|
||||
+ t[0].tv_sec = mtime;
|
||||
+ t[1].tv_sec = mtime;
|
||||
+ vfs_utimes(&wfile->f_path, t);
|
||||
+
|
||||
fput(wfile);
|
||||
if (csum_present && io_csum != hdr_csum)
|
||||
error("bad data checksum");
|
@ -1,122 +0,0 @@
|
||||
--- a/include/linux/netdevice.h
|
||||
+++ b/include/linux/netdevice.h
|
||||
@@ -2133,8 +2133,6 @@ struct net_device {
|
||||
|
||||
/* Protocol-specific pointers */
|
||||
|
||||
- struct in_device __rcu *ip_ptr;
|
||||
- struct inet6_dev __rcu *ip6_ptr;
|
||||
#if IS_ENABLED(CONFIG_VLAN_8021Q)
|
||||
struct vlan_info __rcu *vlan_info;
|
||||
#endif
|
||||
@@ -2147,18 +2145,16 @@ struct net_device {
|
||||
#if IS_ENABLED(CONFIG_ATALK)
|
||||
void *atalk_ptr;
|
||||
#endif
|
||||
+ struct in_device __rcu *ip_ptr;
|
||||
#if IS_ENABLED(CONFIG_DECNET)
|
||||
struct dn_dev __rcu *dn_ptr;
|
||||
#endif
|
||||
+ struct inet6_dev __rcu *ip6_ptr;
|
||||
#if IS_ENABLED(CONFIG_AX25)
|
||||
void *ax25_ptr;
|
||||
#endif
|
||||
-#if IS_ENABLED(CONFIG_CFG80211)
|
||||
struct wireless_dev *ieee80211_ptr;
|
||||
-#endif
|
||||
-#if IS_ENABLED(CONFIG_IEEE802154) || IS_ENABLED(CONFIG_6LOWPAN)
|
||||
struct wpan_dev *ieee802154_ptr;
|
||||
-#endif
|
||||
#if IS_ENABLED(CONFIG_MPLS_ROUTING)
|
||||
struct mpls_dev __rcu *mpls_ptr;
|
||||
#endif
|
||||
--- a/include/net/cfg80211.h
|
||||
+++ b/include/net/cfg80211.h
|
||||
@@ -8379,9 +8379,7 @@ int cfg80211_register_netdevice(struct n
|
||||
*/
|
||||
static inline void cfg80211_unregister_netdevice(struct net_device *dev)
|
||||
{
|
||||
-#if IS_ENABLED(CONFIG_CFG80211)
|
||||
cfg80211_unregister_wdev(dev->ieee80211_ptr);
|
||||
-#endif
|
||||
}
|
||||
|
||||
/**
|
||||
--- a/include/net/cfg802154.h
|
||||
+++ b/include/net/cfg802154.h
|
||||
@@ -373,7 +373,6 @@ struct wpan_dev {
|
||||
|
||||
#define to_phy(_dev) container_of(_dev, struct wpan_phy, dev)
|
||||
|
||||
-#if IS_ENABLED(CONFIG_IEEE802154) || IS_ENABLED(CONFIG_6LOWPAN)
|
||||
static inline int
|
||||
wpan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
|
||||
const struct ieee802154_addr *daddr,
|
||||
@@ -384,7 +383,6 @@ wpan_dev_hard_header(struct sk_buff *skb
|
||||
|
||||
return wpan_dev->header_ops->create(skb, dev, daddr, saddr, len);
|
||||
}
|
||||
-#endif
|
||||
|
||||
struct wpan_phy *
|
||||
wpan_phy_new(const struct cfg802154_ops *ops, size_t priv_size);
|
||||
--- a/net/batman-adv/hard-interface.c
|
||||
+++ b/net/batman-adv/hard-interface.c
|
||||
@@ -308,11 +308,9 @@ static bool batadv_is_cfg80211_netdev(st
|
||||
if (!net_device)
|
||||
return false;
|
||||
|
||||
-#if IS_ENABLED(CONFIG_CFG80211)
|
||||
/* cfg80211 drivers have to set ieee80211_ptr */
|
||||
if (net_device->ieee80211_ptr)
|
||||
return true;
|
||||
-#endif
|
||||
|
||||
return false;
|
||||
}
|
||||
--- a/net/core/net-sysfs.c
|
||||
+++ b/net/core/net-sysfs.c
|
||||
@@ -747,6 +747,7 @@ static const struct attribute_group nets
|
||||
.attrs = netstat_attrs,
|
||||
};
|
||||
|
||||
+#if IS_ENABLED(CONFIG_WIRELESS_EXT) || IS_ENABLED(CONFIG_CFG80211)
|
||||
static struct attribute *wireless_attrs[] = {
|
||||
NULL
|
||||
};
|
||||
@@ -755,19 +756,7 @@ static const struct attribute_group wire
|
||||
.name = "wireless",
|
||||
.attrs = wireless_attrs,
|
||||
};
|
||||
-
|
||||
-static bool wireless_group_needed(struct net_device *ndev)
|
||||
-{
|
||||
-#if IS_ENABLED(CONFIG_CFG80211)
|
||||
- if (ndev->ieee80211_ptr)
|
||||
- return true;
|
||||
#endif
|
||||
-#if IS_ENABLED(CONFIG_WIRELESS_EXT)
|
||||
- if (ndev->wireless_handlers)
|
||||
- return true;
|
||||
-#endif
|
||||
- return false;
|
||||
-}
|
||||
|
||||
#else /* CONFIG_SYSFS */
|
||||
#define net_class_groups NULL
|
||||
@@ -2008,8 +1997,14 @@ int netdev_register_kobject(struct net_d
|
||||
|
||||
*groups++ = &netstat_group;
|
||||
|
||||
- if (wireless_group_needed(ndev))
|
||||
+#if IS_ENABLED(CONFIG_WIRELESS_EXT) || IS_ENABLED(CONFIG_CFG80211)
|
||||
+ if (ndev->ieee80211_ptr)
|
||||
+ *groups++ = &wireless_group;
|
||||
+#if IS_ENABLED(CONFIG_WIRELESS_EXT)
|
||||
+ else if (ndev->wireless_handlers)
|
||||
*groups++ = &wireless_group;
|
||||
+#endif
|
||||
+#endif
|
||||
#endif /* CONFIG_SYSFS */
|
||||
|
||||
error = device_add(dev);
|
@ -1,143 +0,0 @@
|
||||
From e3264035bdac67898d685423ffb2f3a9c3a5964a Mon Sep 17 00:00:00 2001
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
Date: Wed, 4 Aug 2021 01:31:34 -0600
|
||||
Subject: [PATCH 01/14] mm: x86, arm64: add arch_has_hw_pte_young()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Some architectures automatically set the accessed bit in PTEs, e.g.,
|
||||
x86 and arm64 v8.2. On architectures that do not have this capability,
|
||||
clearing the accessed bit in a PTE usually triggers a page fault
|
||||
following the TLB miss of this PTE (to emulate the accessed bit).
|
||||
|
||||
Being aware of this capability can help make better decisions, e.g.,
|
||||
whether to spread the work out over a period of time to reduce bursty
|
||||
page faults when trying to clear the accessed bit in many PTEs.
|
||||
|
||||
Note that theoretically this capability can be unreliable, e.g.,
|
||||
hotplugged CPUs might be different from builtin ones. Therefore it
|
||||
should not be used in architecture-independent code that involves
|
||||
correctness, e.g., to determine whether TLB flushes are required (in
|
||||
combination with the accessed bit).
|
||||
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Reviewed-by: Barry Song <baohua@kernel.org>
|
||||
Acked-by: Brian Geffon <bgeffon@google.com>
|
||||
Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
|
||||
Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
|
||||
Acked-by: Steven Barrett <steven@liquorix.net>
|
||||
Acked-by: Suleiman Souhlal <suleiman@google.com>
|
||||
Acked-by: Will Deacon <will@kernel.org>
|
||||
Tested-by: Daniel Byrne <djbyrne@mtu.edu>
|
||||
Tested-by: Donald Carr <d@chaos-reins.com>
|
||||
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
|
||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
|
||||
Tested-by: Sofia Trinh <sofia.trinh@edi.works>
|
||||
Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
|
||||
Change-Id: Ib49b44fb56df3333a2ff1fcc496fb1980b976e7a
|
||||
---
|
||||
arch/arm64/include/asm/pgtable.h | 15 ++-------------
|
||||
arch/x86/include/asm/pgtable.h | 6 +++---
|
||||
include/linux/pgtable.h | 13 +++++++++++++
|
||||
mm/memory.c | 14 +-------------
|
||||
4 files changed, 19 insertions(+), 29 deletions(-)
|
||||
|
||||
--- a/arch/arm64/include/asm/pgtable.h
|
||||
+++ b/arch/arm64/include/asm/pgtable.h
|
||||
@@ -1082,24 +1082,13 @@ static inline void update_mmu_cache(stru
|
||||
* page after fork() + CoW for pfn mappings. We don't always have a
|
||||
* hardware-managed access flag on arm64.
|
||||
*/
|
||||
-static inline bool arch_faults_on_old_pte(void)
|
||||
-{
|
||||
- /* The register read below requires a stable CPU to make any sense */
|
||||
- cant_migrate();
|
||||
-
|
||||
- return !cpu_has_hw_af();
|
||||
-}
|
||||
-#define arch_faults_on_old_pte arch_faults_on_old_pte
|
||||
+#define arch_has_hw_pte_young cpu_has_hw_af
|
||||
|
||||
/*
|
||||
* Experimentally, it's cheap to set the access flag in hardware and we
|
||||
* benefit from prefaulting mappings as 'old' to start with.
|
||||
*/
|
||||
-static inline bool arch_wants_old_prefaulted_pte(void)
|
||||
-{
|
||||
- return !arch_faults_on_old_pte();
|
||||
-}
|
||||
-#define arch_wants_old_prefaulted_pte arch_wants_old_prefaulted_pte
|
||||
+#define arch_wants_old_prefaulted_pte cpu_has_hw_af
|
||||
|
||||
static inline bool pud_sect_supported(void)
|
||||
{
|
||||
--- a/arch/x86/include/asm/pgtable.h
|
||||
+++ b/arch/x86/include/asm/pgtable.h
|
||||
@@ -1431,10 +1431,10 @@ static inline bool arch_has_pfn_modify_c
|
||||
return boot_cpu_has_bug(X86_BUG_L1TF);
|
||||
}
|
||||
|
||||
-#define arch_faults_on_old_pte arch_faults_on_old_pte
|
||||
-static inline bool arch_faults_on_old_pte(void)
|
||||
+#define arch_has_hw_pte_young arch_has_hw_pte_young
|
||||
+static inline bool arch_has_hw_pte_young(void)
|
||||
{
|
||||
- return false;
|
||||
+ return true;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PAGE_TABLE_CHECK
|
||||
--- a/include/linux/pgtable.h
|
||||
+++ b/include/linux/pgtable.h
|
||||
@@ -260,6 +260,19 @@ static inline int pmdp_clear_flush_young
|
||||
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
||||
#endif
|
||||
|
||||
+#ifndef arch_has_hw_pte_young
|
||||
+/*
|
||||
+ * Return whether the accessed bit is supported on the local CPU.
|
||||
+ *
|
||||
+ * This stub assumes accessing through an old PTE triggers a page fault.
|
||||
+ * Architectures that automatically set the access bit should overwrite it.
|
||||
+ */
|
||||
+static inline bool arch_has_hw_pte_young(void)
|
||||
+{
|
||||
+ return false;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
|
||||
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
|
||||
unsigned long address,
|
||||
--- a/mm/memory.c
|
||||
+++ b/mm/memory.c
|
||||
@@ -125,18 +125,6 @@ int randomize_va_space __read_mostly =
|
||||
2;
|
||||
#endif
|
||||
|
||||
-#ifndef arch_faults_on_old_pte
|
||||
-static inline bool arch_faults_on_old_pte(void)
|
||||
-{
|
||||
- /*
|
||||
- * Those arches which don't have hw access flag feature need to
|
||||
- * implement their own helper. By default, "true" means pagefault
|
||||
- * will be hit on old pte.
|
||||
- */
|
||||
- return true;
|
||||
-}
|
||||
-#endif
|
||||
-
|
||||
#ifndef arch_wants_old_prefaulted_pte
|
||||
static inline bool arch_wants_old_prefaulted_pte(void)
|
||||
{
|
||||
@@ -2872,7 +2860,7 @@ static inline bool __wp_page_copy_user(s
|
||||
* On architectures with software "accessed" bits, we would
|
||||
* take a double page fault, so mark it accessed here.
|
||||
*/
|
||||
- if (arch_faults_on_old_pte() && !pte_young(vmf->orig_pte)) {
|
||||
+ if (!arch_has_hw_pte_young() && !pte_young(vmf->orig_pte)) {
|
||||
pte_t entry;
|
||||
|
||||
vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl);
|
@ -1,132 +0,0 @@
|
||||
From 0c0016e6f53b52166fe4da61c81fa6b27f4650cd Mon Sep 17 00:00:00 2001
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
Date: Sat, 26 Sep 2020 21:17:18 -0600
|
||||
Subject: [PATCH 02/14] mm: x86: add CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Some architectures support the accessed bit in non-leaf PMD entries,
|
||||
e.g., x86 sets the accessed bit in a non-leaf PMD entry when using it
|
||||
as part of linear address translation [1]. Page table walkers that
|
||||
clear the accessed bit may use this capability to reduce their search
|
||||
space.
|
||||
|
||||
Note that:
|
||||
1. Although an inline function is preferable, this capability is added
|
||||
as a configuration option for consistency with the existing macros.
|
||||
2. Due to the little interest in other varieties, this capability was
|
||||
only tested on Intel and AMD CPUs.
|
||||
|
||||
Thanks to the following developers for their efforts [2][3].
|
||||
Randy Dunlap <rdunlap@infradead.org>
|
||||
Stephen Rothwell <sfr@canb.auug.org.au>
|
||||
|
||||
[1]: Intel 64 and IA-32 Architectures Software Developer's Manual
|
||||
Volume 3 (June 2021), section 4.8
|
||||
[2] https://lore.kernel.org/r/bfdcc7c8-922f-61a9-aa15-7e7250f04af7@infradead.org/
|
||||
[3] https://lore.kernel.org/r/20220413151513.5a0d7a7e@canb.auug.org.au/
|
||||
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Reviewed-by: Barry Song <baohua@kernel.org>
|
||||
Acked-by: Brian Geffon <bgeffon@google.com>
|
||||
Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
|
||||
Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
|
||||
Acked-by: Steven Barrett <steven@liquorix.net>
|
||||
Acked-by: Suleiman Souhlal <suleiman@google.com>
|
||||
Tested-by: Daniel Byrne <djbyrne@mtu.edu>
|
||||
Tested-by: Donald Carr <d@chaos-reins.com>
|
||||
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
|
||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
|
||||
Tested-by: Sofia Trinh <sofia.trinh@edi.works>
|
||||
Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
|
||||
Change-Id: I1a17be3ae926f721f7b17ea1539e5c39e8c4f9a8
|
||||
---
|
||||
arch/Kconfig | 8 ++++++++
|
||||
arch/x86/Kconfig | 1 +
|
||||
arch/x86/include/asm/pgtable.h | 3 ++-
|
||||
arch/x86/mm/pgtable.c | 5 ++++-
|
||||
include/linux/pgtable.h | 4 ++--
|
||||
5 files changed, 17 insertions(+), 4 deletions(-)
|
||||
|
||||
--- a/arch/Kconfig
|
||||
+++ b/arch/Kconfig
|
||||
@@ -1418,6 +1418,14 @@ config DYNAMIC_SIGFRAME
|
||||
config HAVE_ARCH_NODE_DEV_GROUP
|
||||
bool
|
||||
|
||||
+config ARCH_HAS_NONLEAF_PMD_YOUNG
|
||||
+ bool
|
||||
+ help
|
||||
+ Architectures that select this option are capable of setting the
|
||||
+ accessed bit in non-leaf PMD entries when using them as part of linear
|
||||
+ address translations. Page table walkers that clear the accessed bit
|
||||
+ may use this capability to reduce their search space.
|
||||
+
|
||||
source "kernel/gcov/Kconfig"
|
||||
|
||||
source "scripts/gcc-plugins/Kconfig"
|
||||
--- a/arch/x86/Kconfig
|
||||
+++ b/arch/x86/Kconfig
|
||||
@@ -85,6 +85,7 @@ config X86
|
||||
select ARCH_HAS_PMEM_API if X86_64
|
||||
select ARCH_HAS_PTE_DEVMAP if X86_64
|
||||
select ARCH_HAS_PTE_SPECIAL
|
||||
+ select ARCH_HAS_NONLEAF_PMD_YOUNG if PGTABLE_LEVELS > 2
|
||||
select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64
|
||||
select ARCH_HAS_COPY_MC if X86_64
|
||||
select ARCH_HAS_SET_MEMORY
|
||||
--- a/arch/x86/include/asm/pgtable.h
|
||||
+++ b/arch/x86/include/asm/pgtable.h
|
||||
@@ -815,7 +815,8 @@ static inline unsigned long pmd_page_vad
|
||||
|
||||
static inline int pmd_bad(pmd_t pmd)
|
||||
{
|
||||
- return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE;
|
||||
+ return (pmd_flags(pmd) & ~(_PAGE_USER | _PAGE_ACCESSED)) !=
|
||||
+ (_KERNPG_TABLE & ~_PAGE_ACCESSED);
|
||||
}
|
||||
|
||||
static inline unsigned long pages_to_mb(unsigned long npg)
|
||||
--- a/arch/x86/mm/pgtable.c
|
||||
+++ b/arch/x86/mm/pgtable.c
|
||||
@@ -550,7 +550,7 @@ int ptep_test_and_clear_young(struct vm_
|
||||
return ret;
|
||||
}
|
||||
|
||||
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
|
||||
int pmdp_test_and_clear_young(struct vm_area_struct *vma,
|
||||
unsigned long addr, pmd_t *pmdp)
|
||||
{
|
||||
@@ -562,6 +562,9 @@ int pmdp_test_and_clear_young(struct vm_
|
||||
|
||||
return ret;
|
||||
}
|
||||
+#endif
|
||||
+
|
||||
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
int pudp_test_and_clear_young(struct vm_area_struct *vma,
|
||||
unsigned long addr, pud_t *pudp)
|
||||
{
|
||||
--- a/include/linux/pgtable.h
|
||||
+++ b/include/linux/pgtable.h
|
||||
@@ -213,7 +213,7 @@ static inline int ptep_test_and_clear_yo
|
||||
#endif
|
||||
|
||||
#ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
|
||||
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
|
||||
static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
|
||||
unsigned long address,
|
||||
pmd_t *pmdp)
|
||||
@@ -234,7 +234,7 @@ static inline int pmdp_test_and_clear_yo
|
||||
BUILD_BUG();
|
||||
return 0;
|
||||
}
|
||||
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
||||
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG */
|
||||
#endif
|
||||
|
||||
#ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
|
@ -1,254 +0,0 @@
|
||||
From d8e0edcddc441574410a047ede56f79c849a6d37 Mon Sep 17 00:00:00 2001
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
Date: Sun, 27 Sep 2020 20:49:08 -0600
|
||||
Subject: [PATCH 03/14] mm/vmscan.c: refactor shrink_node()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
This patch refactors shrink_node() to improve readability for the
|
||||
upcoming changes to mm/vmscan.c.
|
||||
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Reviewed-by: Barry Song <baohua@kernel.org>
|
||||
Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>
|
||||
Acked-by: Brian Geffon <bgeffon@google.com>
|
||||
Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
|
||||
Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
|
||||
Acked-by: Steven Barrett <steven@liquorix.net>
|
||||
Acked-by: Suleiman Souhlal <suleiman@google.com>
|
||||
Tested-by: Daniel Byrne <djbyrne@mtu.edu>
|
||||
Tested-by: Donald Carr <d@chaos-reins.com>
|
||||
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
|
||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
|
||||
Tested-by: Sofia Trinh <sofia.trinh@edi.works>
|
||||
Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
|
||||
Change-Id: Iae734b5b4030205b7db6e8c841f747b6f6ae1a04
|
||||
---
|
||||
mm/vmscan.c | 198 +++++++++++++++++++++++++++-------------------------
|
||||
1 file changed, 104 insertions(+), 94 deletions(-)
|
||||
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -2728,6 +2728,109 @@ enum scan_balance {
|
||||
SCAN_FILE,
|
||||
};
|
||||
|
||||
+static void prepare_scan_count(pg_data_t *pgdat, struct scan_control *sc)
|
||||
+{
|
||||
+ unsigned long file;
|
||||
+ struct lruvec *target_lruvec;
|
||||
+
|
||||
+ target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
|
||||
+
|
||||
+ /*
|
||||
+ * Flush the memory cgroup stats, so that we read accurate per-memcg
|
||||
+ * lruvec stats for heuristics.
|
||||
+ */
|
||||
+ mem_cgroup_flush_stats();
|
||||
+
|
||||
+ /*
|
||||
+ * Determine the scan balance between anon and file LRUs.
|
||||
+ */
|
||||
+ spin_lock_irq(&target_lruvec->lru_lock);
|
||||
+ sc->anon_cost = target_lruvec->anon_cost;
|
||||
+ sc->file_cost = target_lruvec->file_cost;
|
||||
+ spin_unlock_irq(&target_lruvec->lru_lock);
|
||||
+
|
||||
+ /*
|
||||
+ * Target desirable inactive:active list ratios for the anon
|
||||
+ * and file LRU lists.
|
||||
+ */
|
||||
+ if (!sc->force_deactivate) {
|
||||
+ unsigned long refaults;
|
||||
+
|
||||
+ refaults = lruvec_page_state(target_lruvec,
|
||||
+ WORKINGSET_ACTIVATE_ANON);
|
||||
+ if (refaults != target_lruvec->refaults[0] ||
|
||||
+ inactive_is_low(target_lruvec, LRU_INACTIVE_ANON))
|
||||
+ sc->may_deactivate |= DEACTIVATE_ANON;
|
||||
+ else
|
||||
+ sc->may_deactivate &= ~DEACTIVATE_ANON;
|
||||
+
|
||||
+ /*
|
||||
+ * When refaults are being observed, it means a new
|
||||
+ * workingset is being established. Deactivate to get
|
||||
+ * rid of any stale active pages quickly.
|
||||
+ */
|
||||
+ refaults = lruvec_page_state(target_lruvec,
|
||||
+ WORKINGSET_ACTIVATE_FILE);
|
||||
+ if (refaults != target_lruvec->refaults[1] ||
|
||||
+ inactive_is_low(target_lruvec, LRU_INACTIVE_FILE))
|
||||
+ sc->may_deactivate |= DEACTIVATE_FILE;
|
||||
+ else
|
||||
+ sc->may_deactivate &= ~DEACTIVATE_FILE;
|
||||
+ } else
|
||||
+ sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE;
|
||||
+
|
||||
+ /*
|
||||
+ * If we have plenty of inactive file pages that aren't
|
||||
+ * thrashing, try to reclaim those first before touching
|
||||
+ * anonymous pages.
|
||||
+ */
|
||||
+ file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE);
|
||||
+ if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE))
|
||||
+ sc->cache_trim_mode = 1;
|
||||
+ else
|
||||
+ sc->cache_trim_mode = 0;
|
||||
+
|
||||
+ /*
|
||||
+ * Prevent the reclaimer from falling into the cache trap: as
|
||||
+ * cache pages start out inactive, every cache fault will tip
|
||||
+ * the scan balance towards the file LRU. And as the file LRU
|
||||
+ * shrinks, so does the window for rotation from references.
|
||||
+ * This means we have a runaway feedback loop where a tiny
|
||||
+ * thrashing file LRU becomes infinitely more attractive than
|
||||
+ * anon pages. Try to detect this based on file LRU size.
|
||||
+ */
|
||||
+ if (!cgroup_reclaim(sc)) {
|
||||
+ unsigned long total_high_wmark = 0;
|
||||
+ unsigned long free, anon;
|
||||
+ int z;
|
||||
+
|
||||
+ free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES);
|
||||
+ file = node_page_state(pgdat, NR_ACTIVE_FILE) +
|
||||
+ node_page_state(pgdat, NR_INACTIVE_FILE);
|
||||
+
|
||||
+ for (z = 0; z < MAX_NR_ZONES; z++) {
|
||||
+ struct zone *zone = &pgdat->node_zones[z];
|
||||
+
|
||||
+ if (!managed_zone(zone))
|
||||
+ continue;
|
||||
+
|
||||
+ total_high_wmark += high_wmark_pages(zone);
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * Consider anon: if that's low too, this isn't a
|
||||
+ * runaway file reclaim problem, but rather just
|
||||
+ * extreme pressure. Reclaim as per usual then.
|
||||
+ */
|
||||
+ anon = node_page_state(pgdat, NR_INACTIVE_ANON);
|
||||
+
|
||||
+ sc->file_is_tiny =
|
||||
+ file + free <= total_high_wmark &&
|
||||
+ !(sc->may_deactivate & DEACTIVATE_ANON) &&
|
||||
+ anon >> sc->priority;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Determine how aggressively the anon and file LRU lists should be
|
||||
* scanned.
|
||||
@@ -3197,109 +3300,16 @@ static void shrink_node(pg_data_t *pgdat
|
||||
unsigned long nr_reclaimed, nr_scanned;
|
||||
struct lruvec *target_lruvec;
|
||||
bool reclaimable = false;
|
||||
- unsigned long file;
|
||||
|
||||
target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
|
||||
|
||||
again:
|
||||
- /*
|
||||
- * Flush the memory cgroup stats, so that we read accurate per-memcg
|
||||
- * lruvec stats for heuristics.
|
||||
- */
|
||||
- mem_cgroup_flush_stats();
|
||||
-
|
||||
memset(&sc->nr, 0, sizeof(sc->nr));
|
||||
|
||||
nr_reclaimed = sc->nr_reclaimed;
|
||||
nr_scanned = sc->nr_scanned;
|
||||
|
||||
- /*
|
||||
- * Determine the scan balance between anon and file LRUs.
|
||||
- */
|
||||
- spin_lock_irq(&target_lruvec->lru_lock);
|
||||
- sc->anon_cost = target_lruvec->anon_cost;
|
||||
- sc->file_cost = target_lruvec->file_cost;
|
||||
- spin_unlock_irq(&target_lruvec->lru_lock);
|
||||
-
|
||||
- /*
|
||||
- * Target desirable inactive:active list ratios for the anon
|
||||
- * and file LRU lists.
|
||||
- */
|
||||
- if (!sc->force_deactivate) {
|
||||
- unsigned long refaults;
|
||||
-
|
||||
- refaults = lruvec_page_state(target_lruvec,
|
||||
- WORKINGSET_ACTIVATE_ANON);
|
||||
- if (refaults != target_lruvec->refaults[0] ||
|
||||
- inactive_is_low(target_lruvec, LRU_INACTIVE_ANON))
|
||||
- sc->may_deactivate |= DEACTIVATE_ANON;
|
||||
- else
|
||||
- sc->may_deactivate &= ~DEACTIVATE_ANON;
|
||||
-
|
||||
- /*
|
||||
- * When refaults are being observed, it means a new
|
||||
- * workingset is being established. Deactivate to get
|
||||
- * rid of any stale active pages quickly.
|
||||
- */
|
||||
- refaults = lruvec_page_state(target_lruvec,
|
||||
- WORKINGSET_ACTIVATE_FILE);
|
||||
- if (refaults != target_lruvec->refaults[1] ||
|
||||
- inactive_is_low(target_lruvec, LRU_INACTIVE_FILE))
|
||||
- sc->may_deactivate |= DEACTIVATE_FILE;
|
||||
- else
|
||||
- sc->may_deactivate &= ~DEACTIVATE_FILE;
|
||||
- } else
|
||||
- sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE;
|
||||
-
|
||||
- /*
|
||||
- * If we have plenty of inactive file pages that aren't
|
||||
- * thrashing, try to reclaim those first before touching
|
||||
- * anonymous pages.
|
||||
- */
|
||||
- file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE);
|
||||
- if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE))
|
||||
- sc->cache_trim_mode = 1;
|
||||
- else
|
||||
- sc->cache_trim_mode = 0;
|
||||
-
|
||||
- /*
|
||||
- * Prevent the reclaimer from falling into the cache trap: as
|
||||
- * cache pages start out inactive, every cache fault will tip
|
||||
- * the scan balance towards the file LRU. And as the file LRU
|
||||
- * shrinks, so does the window for rotation from references.
|
||||
- * This means we have a runaway feedback loop where a tiny
|
||||
- * thrashing file LRU becomes infinitely more attractive than
|
||||
- * anon pages. Try to detect this based on file LRU size.
|
||||
- */
|
||||
- if (!cgroup_reclaim(sc)) {
|
||||
- unsigned long total_high_wmark = 0;
|
||||
- unsigned long free, anon;
|
||||
- int z;
|
||||
-
|
||||
- free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES);
|
||||
- file = node_page_state(pgdat, NR_ACTIVE_FILE) +
|
||||
- node_page_state(pgdat, NR_INACTIVE_FILE);
|
||||
-
|
||||
- for (z = 0; z < MAX_NR_ZONES; z++) {
|
||||
- struct zone *zone = &pgdat->node_zones[z];
|
||||
- if (!managed_zone(zone))
|
||||
- continue;
|
||||
-
|
||||
- total_high_wmark += high_wmark_pages(zone);
|
||||
- }
|
||||
-
|
||||
- /*
|
||||
- * Consider anon: if that's low too, this isn't a
|
||||
- * runaway file reclaim problem, but rather just
|
||||
- * extreme pressure. Reclaim as per usual then.
|
||||
- */
|
||||
- anon = node_page_state(pgdat, NR_INACTIVE_ANON);
|
||||
-
|
||||
- sc->file_is_tiny =
|
||||
- file + free <= total_high_wmark &&
|
||||
- !(sc->may_deactivate & DEACTIVATE_ANON) &&
|
||||
- anon >> sc->priority;
|
||||
- }
|
||||
+ prepare_scan_count(pgdat, sc);
|
||||
|
||||
shrink_node_memcgs(pgdat, sc);
|
||||
|
@ -1,59 +0,0 @@
|
||||
From bc14d2c7c6d0fb8c79ad0fc5eab488b977cbcccf Mon Sep 17 00:00:00 2001
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
Date: Sun, 6 Mar 2022 20:22:40 -0700
|
||||
Subject: [PATCH 04/14] Revert "include/linux/mm_inline.h: fold
|
||||
__update_lru_size() into its sole caller"
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
This patch undoes the following refactor:
|
||||
commit 289ccba18af4 ("include/linux/mm_inline.h: fold __update_lru_size() into its sole caller")
|
||||
|
||||
The upcoming changes to include/linux/mm_inline.h will reuse
|
||||
__update_lru_size().
|
||||
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>
|
||||
Acked-by: Brian Geffon <bgeffon@google.com>
|
||||
Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
|
||||
Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
|
||||
Acked-by: Steven Barrett <steven@liquorix.net>
|
||||
Acked-by: Suleiman Souhlal <suleiman@google.com>
|
||||
Tested-by: Daniel Byrne <djbyrne@mtu.edu>
|
||||
Tested-by: Donald Carr <d@chaos-reins.com>
|
||||
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
|
||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
|
||||
Tested-by: Sofia Trinh <sofia.trinh@edi.works>
|
||||
Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
|
||||
Change-Id: I6155c407d50199a43b179c7f45904d4b7c052118
|
||||
---
|
||||
include/linux/mm_inline.h | 9 ++++++++-
|
||||
1 file changed, 8 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/include/linux/mm_inline.h
|
||||
+++ b/include/linux/mm_inline.h
|
||||
@@ -34,7 +34,7 @@ static inline int page_is_file_lru(struc
|
||||
return folio_is_file_lru(page_folio(page));
|
||||
}
|
||||
|
||||
-static __always_inline void update_lru_size(struct lruvec *lruvec,
|
||||
+static __always_inline void __update_lru_size(struct lruvec *lruvec,
|
||||
enum lru_list lru, enum zone_type zid,
|
||||
long nr_pages)
|
||||
{
|
||||
@@ -43,6 +43,13 @@ static __always_inline void update_lru_s
|
||||
__mod_lruvec_state(lruvec, NR_LRU_BASE + lru, nr_pages);
|
||||
__mod_zone_page_state(&pgdat->node_zones[zid],
|
||||
NR_ZONE_LRU_BASE + lru, nr_pages);
|
||||
+}
|
||||
+
|
||||
+static __always_inline void update_lru_size(struct lruvec *lruvec,
|
||||
+ enum lru_list lru, enum zone_type zid,
|
||||
+ long nr_pages)
|
||||
+{
|
||||
+ __update_lru_size(lruvec, lru, zid, nr_pages);
|
||||
#ifdef CONFIG_MEMCG
|
||||
mem_cgroup_update_lru_size(lruvec, lru, zid, nr_pages);
|
||||
#endif
|
@ -1,777 +0,0 @@
|
||||
From 8c6beb4548c216da9dae5e1a7612a108396e3f9e Mon Sep 17 00:00:00 2001
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
Date: Mon, 25 Jan 2021 21:12:33 -0700
|
||||
Subject: [PATCH 05/14] mm: multi-gen LRU: groundwork
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Evictable pages are divided into multiple generations for each lruvec.
|
||||
The youngest generation number is stored in lrugen->max_seq for both
|
||||
anon and file types as they are aged on an equal footing. The oldest
|
||||
generation numbers are stored in lrugen->min_seq[] separately for anon
|
||||
and file types as clean file pages can be evicted regardless of swap
|
||||
constraints. These three variables are monotonically increasing.
|
||||
|
||||
Generation numbers are truncated into order_base_2(MAX_NR_GENS+1) bits
|
||||
in order to fit into the gen counter in folio->flags. Each truncated
|
||||
generation number is an index to lrugen->lists[]. The sliding window
|
||||
technique is used to track at least MIN_NR_GENS and at most
|
||||
MAX_NR_GENS generations. The gen counter stores a value within [1,
|
||||
MAX_NR_GENS] while a page is on one of lrugen->lists[]. Otherwise it
|
||||
stores 0.
|
||||
|
||||
There are two conceptually independent procedures: "the aging", which
|
||||
produces young generations, and "the eviction", which consumes old
|
||||
generations. They form a closed-loop system, i.e., "the page reclaim".
|
||||
Both procedures can be invoked from userspace for the purposes of
|
||||
working set estimation and proactive reclaim. These techniques are
|
||||
commonly used to optimize job scheduling (bin packing) in data
|
||||
centers [1][2].
|
||||
|
||||
To avoid confusion, the terms "hot" and "cold" will be applied to the
|
||||
multi-gen LRU, as a new convention; the terms "active" and "inactive"
|
||||
will be applied to the active/inactive LRU, as usual.
|
||||
|
||||
The protection of hot pages and the selection of cold pages are based
|
||||
on page access channels and patterns. There are two access channels:
|
||||
one through page tables and the other through file descriptors. The
|
||||
protection of the former channel is by design stronger because:
|
||||
1. The uncertainty in determining the access patterns of the former
|
||||
channel is higher due to the approximation of the accessed bit.
|
||||
2. The cost of evicting the former channel is higher due to the TLB
|
||||
flushes required and the likelihood of encountering the dirty bit.
|
||||
3. The penalty of underprotecting the former channel is higher because
|
||||
applications usually do not prepare themselves for major page
|
||||
faults like they do for blocked I/O. E.g., GUI applications
|
||||
commonly use dedicated I/O threads to avoid blocking rendering
|
||||
threads.
|
||||
There are also two access patterns: one with temporal locality and the
|
||||
other without. For the reasons listed above, the former channel is
|
||||
assumed to follow the former pattern unless VM_SEQ_READ or
|
||||
VM_RAND_READ is present; the latter channel is assumed to follow the
|
||||
latter pattern unless outlying refaults have been observed [3][4].
|
||||
|
||||
The next patch will address the "outlying refaults". Three macros,
|
||||
i.e., LRU_REFS_WIDTH, LRU_REFS_PGOFF and LRU_REFS_MASK, used later are
|
||||
added in this patch to make the entire patchset less diffy.
|
||||
|
||||
A page is added to the youngest generation on faulting. The aging
|
||||
needs to check the accessed bit at least twice before handing this
|
||||
page over to the eviction. The first check takes care of the accessed
|
||||
bit set on the initial fault; the second check makes sure this page
|
||||
has not been used since then. This protocol, AKA second chance,
|
||||
requires a minimum of two generations, hence MIN_NR_GENS.
|
||||
|
||||
[1] https://dl.acm.org/doi/10.1145/3297858.3304053
|
||||
[2] https://dl.acm.org/doi/10.1145/3503222.3507731
|
||||
[3] https://lwn.net/Articles/495543/
|
||||
[4] https://lwn.net/Articles/815342/
|
||||
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Acked-by: Brian Geffon <bgeffon@google.com>
|
||||
Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
|
||||
Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
|
||||
Acked-by: Steven Barrett <steven@liquorix.net>
|
||||
Acked-by: Suleiman Souhlal <suleiman@google.com>
|
||||
Tested-by: Daniel Byrne <djbyrne@mtu.edu>
|
||||
Tested-by: Donald Carr <d@chaos-reins.com>
|
||||
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
|
||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
|
||||
Tested-by: Sofia Trinh <sofia.trinh@edi.works>
|
||||
Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
|
||||
Change-Id: I71de7cd15b8dfa6f9fdd838023474693c4fee0a7
|
||||
---
|
||||
fs/fuse/dev.c | 3 +-
|
||||
include/linux/mm_inline.h | 175 ++++++++++++++++++++++++++++++
|
||||
include/linux/mmzone.h | 102 +++++++++++++++++
|
||||
include/linux/page-flags-layout.h | 13 ++-
|
||||
include/linux/page-flags.h | 4 +-
|
||||
include/linux/sched.h | 4 +
|
||||
kernel/bounds.c | 5 +
|
||||
mm/Kconfig | 8 ++
|
||||
mm/huge_memory.c | 3 +-
|
||||
mm/memcontrol.c | 2 +
|
||||
mm/memory.c | 25 +++++
|
||||
mm/mm_init.c | 6 +-
|
||||
mm/mmzone.c | 2 +
|
||||
mm/swap.c | 11 +-
|
||||
mm/vmscan.c | 75 +++++++++++++
|
||||
15 files changed, 424 insertions(+), 14 deletions(-)
|
||||
|
||||
--- a/fs/fuse/dev.c
|
||||
+++ b/fs/fuse/dev.c
|
||||
@@ -776,7 +776,8 @@ static int fuse_check_page(struct page *
|
||||
1 << PG_active |
|
||||
1 << PG_workingset |
|
||||
1 << PG_reclaim |
|
||||
- 1 << PG_waiters))) {
|
||||
+ 1 << PG_waiters |
|
||||
+ LRU_GEN_MASK | LRU_REFS_MASK))) {
|
||||
dump_page(page, "fuse: trying to steal weird page");
|
||||
return 1;
|
||||
}
|
||||
--- a/include/linux/mm_inline.h
|
||||
+++ b/include/linux/mm_inline.h
|
||||
@@ -40,6 +40,9 @@ static __always_inline void __update_lru
|
||||
{
|
||||
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
|
||||
|
||||
+ lockdep_assert_held(&lruvec->lru_lock);
|
||||
+ WARN_ON_ONCE(nr_pages != (int)nr_pages);
|
||||
+
|
||||
__mod_lruvec_state(lruvec, NR_LRU_BASE + lru, nr_pages);
|
||||
__mod_zone_page_state(&pgdat->node_zones[zid],
|
||||
NR_ZONE_LRU_BASE + lru, nr_pages);
|
||||
@@ -101,11 +104,177 @@ static __always_inline enum lru_list fol
|
||||
return lru;
|
||||
}
|
||||
|
||||
+#ifdef CONFIG_LRU_GEN
|
||||
+
|
||||
+static inline bool lru_gen_enabled(void)
|
||||
+{
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+static inline bool lru_gen_in_fault(void)
|
||||
+{
|
||||
+ return current->in_lru_fault;
|
||||
+}
|
||||
+
|
||||
+static inline int lru_gen_from_seq(unsigned long seq)
|
||||
+{
|
||||
+ return seq % MAX_NR_GENS;
|
||||
+}
|
||||
+
|
||||
+static inline int folio_lru_gen(struct folio *folio)
|
||||
+{
|
||||
+ unsigned long flags = READ_ONCE(folio->flags);
|
||||
+
|
||||
+ return ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
|
||||
+}
|
||||
+
|
||||
+static inline bool lru_gen_is_active(struct lruvec *lruvec, int gen)
|
||||
+{
|
||||
+ unsigned long max_seq = lruvec->lrugen.max_seq;
|
||||
+
|
||||
+ VM_WARN_ON_ONCE(gen >= MAX_NR_GENS);
|
||||
+
|
||||
+ /* see the comment on MIN_NR_GENS */
|
||||
+ return gen == lru_gen_from_seq(max_seq) || gen == lru_gen_from_seq(max_seq - 1);
|
||||
+}
|
||||
+
|
||||
+static inline void lru_gen_update_size(struct lruvec *lruvec, struct folio *folio,
|
||||
+ int old_gen, int new_gen)
|
||||
+{
|
||||
+ int type = folio_is_file_lru(folio);
|
||||
+ int zone = folio_zonenum(folio);
|
||||
+ int delta = folio_nr_pages(folio);
|
||||
+ enum lru_list lru = type * LRU_INACTIVE_FILE;
|
||||
+ struct lru_gen_struct *lrugen = &lruvec->lrugen;
|
||||
+
|
||||
+ VM_WARN_ON_ONCE(old_gen != -1 && old_gen >= MAX_NR_GENS);
|
||||
+ VM_WARN_ON_ONCE(new_gen != -1 && new_gen >= MAX_NR_GENS);
|
||||
+ VM_WARN_ON_ONCE(old_gen == -1 && new_gen == -1);
|
||||
+
|
||||
+ if (old_gen >= 0)
|
||||
+ WRITE_ONCE(lrugen->nr_pages[old_gen][type][zone],
|
||||
+ lrugen->nr_pages[old_gen][type][zone] - delta);
|
||||
+ if (new_gen >= 0)
|
||||
+ WRITE_ONCE(lrugen->nr_pages[new_gen][type][zone],
|
||||
+ lrugen->nr_pages[new_gen][type][zone] + delta);
|
||||
+
|
||||
+ /* addition */
|
||||
+ if (old_gen < 0) {
|
||||
+ if (lru_gen_is_active(lruvec, new_gen))
|
||||
+ lru += LRU_ACTIVE;
|
||||
+ __update_lru_size(lruvec, lru, zone, delta);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ /* deletion */
|
||||
+ if (new_gen < 0) {
|
||||
+ if (lru_gen_is_active(lruvec, old_gen))
|
||||
+ lru += LRU_ACTIVE;
|
||||
+ __update_lru_size(lruvec, lru, zone, -delta);
|
||||
+ return;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
|
||||
+{
|
||||
+ unsigned long seq;
|
||||
+ unsigned long flags;
|
||||
+ int gen = folio_lru_gen(folio);
|
||||
+ int type = folio_is_file_lru(folio);
|
||||
+ int zone = folio_zonenum(folio);
|
||||
+ struct lru_gen_struct *lrugen = &lruvec->lrugen;
|
||||
+
|
||||
+ VM_WARN_ON_ONCE_FOLIO(gen != -1, folio);
|
||||
+
|
||||
+ if (folio_test_unevictable(folio))
|
||||
+ return false;
|
||||
+ /*
|
||||
+ * There are three common cases for this page:
|
||||
+ * 1. If it's hot, e.g., freshly faulted in or previously hot and
|
||||
+ * migrated, add it to the youngest generation.
|
||||
+ * 2. If it's cold but can't be evicted immediately, i.e., an anon page
|
||||
+ * not in swapcache or a dirty page pending writeback, add it to the
|
||||
+ * second oldest generation.
|
||||
+ * 3. Everything else (clean, cold) is added to the oldest generation.
|
||||
+ */
|
||||
+ if (folio_test_active(folio))
|
||||
+ seq = lrugen->max_seq;
|
||||
+ else if ((type == LRU_GEN_ANON && !folio_test_swapcache(folio)) ||
|
||||
+ (folio_test_reclaim(folio) &&
|
||||
+ (folio_test_dirty(folio) || folio_test_writeback(folio))))
|
||||
+ seq = lrugen->min_seq[type] + 1;
|
||||
+ else
|
||||
+ seq = lrugen->min_seq[type];
|
||||
+
|
||||
+ gen = lru_gen_from_seq(seq);
|
||||
+ flags = (gen + 1UL) << LRU_GEN_PGOFF;
|
||||
+ /* see the comment on MIN_NR_GENS about PG_active */
|
||||
+ set_mask_bits(&folio->flags, LRU_GEN_MASK | BIT(PG_active), flags);
|
||||
+
|
||||
+ lru_gen_update_size(lruvec, folio, -1, gen);
|
||||
+ /* for folio_rotate_reclaimable() */
|
||||
+ if (reclaiming)
|
||||
+ list_add_tail(&folio->lru, &lrugen->lists[gen][type][zone]);
|
||||
+ else
|
||||
+ list_add(&folio->lru, &lrugen->lists[gen][type][zone]);
|
||||
+
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
|
||||
+{
|
||||
+ unsigned long flags;
|
||||
+ int gen = folio_lru_gen(folio);
|
||||
+
|
||||
+ if (gen < 0)
|
||||
+ return false;
|
||||
+
|
||||
+ VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
|
||||
+ VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
|
||||
+
|
||||
+ /* for folio_migrate_flags() */
|
||||
+ flags = !reclaiming && lru_gen_is_active(lruvec, gen) ? BIT(PG_active) : 0;
|
||||
+ flags = set_mask_bits(&folio->flags, LRU_GEN_MASK, flags);
|
||||
+ gen = ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
|
||||
+
|
||||
+ lru_gen_update_size(lruvec, folio, gen, -1);
|
||||
+ list_del(&folio->lru);
|
||||
+
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+#else /* !CONFIG_LRU_GEN */
|
||||
+
|
||||
+static inline bool lru_gen_enabled(void)
|
||||
+{
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+static inline bool lru_gen_in_fault(void)
|
||||
+{
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
|
||||
+{
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
|
||||
+{
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+#endif /* CONFIG_LRU_GEN */
|
||||
+
|
||||
static __always_inline
|
||||
void lruvec_add_folio(struct lruvec *lruvec, struct folio *folio)
|
||||
{
|
||||
enum lru_list lru = folio_lru_list(folio);
|
||||
|
||||
+ if (lru_gen_add_folio(lruvec, folio, false))
|
||||
+ return;
|
||||
+
|
||||
update_lru_size(lruvec, lru, folio_zonenum(folio),
|
||||
folio_nr_pages(folio));
|
||||
if (lru != LRU_UNEVICTABLE)
|
||||
@@ -123,6 +292,9 @@ void lruvec_add_folio_tail(struct lruvec
|
||||
{
|
||||
enum lru_list lru = folio_lru_list(folio);
|
||||
|
||||
+ if (lru_gen_add_folio(lruvec, folio, true))
|
||||
+ return;
|
||||
+
|
||||
update_lru_size(lruvec, lru, folio_zonenum(folio),
|
||||
folio_nr_pages(folio));
|
||||
/* This is not expected to be used on LRU_UNEVICTABLE */
|
||||
@@ -140,6 +312,9 @@ void lruvec_del_folio(struct lruvec *lru
|
||||
{
|
||||
enum lru_list lru = folio_lru_list(folio);
|
||||
|
||||
+ if (lru_gen_del_folio(lruvec, folio, false))
|
||||
+ return;
|
||||
+
|
||||
if (lru != LRU_UNEVICTABLE)
|
||||
list_del(&folio->lru);
|
||||
update_lru_size(lruvec, lru, folio_zonenum(folio),
|
||||
--- a/include/linux/mmzone.h
|
||||
+++ b/include/linux/mmzone.h
|
||||
@@ -314,6 +314,102 @@ enum lruvec_flags {
|
||||
*/
|
||||
};
|
||||
|
||||
+#endif /* !__GENERATING_BOUNDS_H */
|
||||
+
|
||||
+/*
|
||||
+ * Evictable pages are divided into multiple generations. The youngest and the
|
||||
+ * oldest generation numbers, max_seq and min_seq, are monotonically increasing.
|
||||
+ * They form a sliding window of a variable size [MIN_NR_GENS, MAX_NR_GENS]. An
|
||||
+ * offset within MAX_NR_GENS, i.e., gen, indexes the LRU list of the
|
||||
+ * corresponding generation. The gen counter in folio->flags stores gen+1 while
|
||||
+ * a page is on one of lrugen->lists[]. Otherwise it stores 0.
|
||||
+ *
|
||||
+ * A page is added to the youngest generation on faulting. The aging needs to
|
||||
+ * check the accessed bit at least twice before handing this page over to the
|
||||
+ * eviction. The first check takes care of the accessed bit set on the initial
|
||||
+ * fault; the second check makes sure this page hasn't been used since then.
|
||||
+ * This process, AKA second chance, requires a minimum of two generations,
|
||||
+ * hence MIN_NR_GENS. And to maintain ABI compatibility with the active/inactive
|
||||
+ * LRU, e.g., /proc/vmstat, these two generations are considered active; the
|
||||
+ * rest of generations, if they exist, are considered inactive. See
|
||||
+ * lru_gen_is_active().
|
||||
+ *
|
||||
+ * PG_active is always cleared while a page is on one of lrugen->lists[] so that
|
||||
+ * the aging needs not to worry about it. And it's set again when a page
|
||||
+ * considered active is isolated for non-reclaiming purposes, e.g., migration.
|
||||
+ * See lru_gen_add_folio() and lru_gen_del_folio().
|
||||
+ *
|
||||
+ * MAX_NR_GENS is set to 4 so that the multi-gen LRU can support twice the
|
||||
+ * number of categories of the active/inactive LRU when keeping track of
|
||||
+ * accesses through page tables. This requires order_base_2(MAX_NR_GENS+1) bits
|
||||
+ * in folio->flags.
|
||||
+ */
|
||||
+#define MIN_NR_GENS 2U
|
||||
+#define MAX_NR_GENS 4U
|
||||
+
|
||||
+#ifndef __GENERATING_BOUNDS_H
|
||||
+
|
||||
+struct lruvec;
|
||||
+
|
||||
+#define LRU_GEN_MASK ((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF)
|
||||
+#define LRU_REFS_MASK ((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF)
|
||||
+
|
||||
+#ifdef CONFIG_LRU_GEN
|
||||
+
|
||||
+enum {
|
||||
+ LRU_GEN_ANON,
|
||||
+ LRU_GEN_FILE,
|
||||
+};
|
||||
+
|
||||
+/*
|
||||
+ * The youngest generation number is stored in max_seq for both anon and file
|
||||
+ * types as they are aged on an equal footing. The oldest generation numbers are
|
||||
+ * stored in min_seq[] separately for anon and file types as clean file pages
|
||||
+ * can be evicted regardless of swap constraints.
|
||||
+ *
|
||||
+ * Normally anon and file min_seq are in sync. But if swapping is constrained,
|
||||
+ * e.g., out of swap space, file min_seq is allowed to advance and leave anon
|
||||
+ * min_seq behind.
|
||||
+ *
|
||||
+ * The number of pages in each generation is eventually consistent and therefore
|
||||
+ * can be transiently negative.
|
||||
+ */
|
||||
+struct lru_gen_struct {
|
||||
+ /* the aging increments the youngest generation number */
|
||||
+ unsigned long max_seq;
|
||||
+ /* the eviction increments the oldest generation numbers */
|
||||
+ unsigned long min_seq[ANON_AND_FILE];
|
||||
+ /* the multi-gen LRU lists, lazily sorted on eviction */
|
||||
+ struct list_head lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
|
||||
+ /* the multi-gen LRU sizes, eventually consistent */
|
||||
+ long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
|
||||
+};
|
||||
+
|
||||
+void lru_gen_init_lruvec(struct lruvec *lruvec);
|
||||
+
|
||||
+#ifdef CONFIG_MEMCG
|
||||
+void lru_gen_init_memcg(struct mem_cgroup *memcg);
|
||||
+void lru_gen_exit_memcg(struct mem_cgroup *memcg);
|
||||
+#endif
|
||||
+
|
||||
+#else /* !CONFIG_LRU_GEN */
|
||||
+
|
||||
+static inline void lru_gen_init_lruvec(struct lruvec *lruvec)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
+#ifdef CONFIG_MEMCG
|
||||
+static inline void lru_gen_init_memcg(struct mem_cgroup *memcg)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
+static inline void lru_gen_exit_memcg(struct mem_cgroup *memcg)
|
||||
+{
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
+#endif /* CONFIG_LRU_GEN */
|
||||
+
|
||||
struct lruvec {
|
||||
struct list_head lists[NR_LRU_LISTS];
|
||||
/* per lruvec lru_lock for memcg */
|
||||
@@ -331,6 +427,10 @@ struct lruvec {
|
||||
unsigned long refaults[ANON_AND_FILE];
|
||||
/* Various lruvec state flags (enum lruvec_flags) */
|
||||
unsigned long flags;
|
||||
+#ifdef CONFIG_LRU_GEN
|
||||
+ /* evictable pages divided into generations */
|
||||
+ struct lru_gen_struct lrugen;
|
||||
+#endif
|
||||
#ifdef CONFIG_MEMCG
|
||||
struct pglist_data *pgdat;
|
||||
#endif
|
||||
@@ -746,6 +846,8 @@ static inline bool zone_is_empty(struct
|
||||
#define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH)
|
||||
#define LAST_CPUPID_PGOFF (ZONES_PGOFF - LAST_CPUPID_WIDTH)
|
||||
#define KASAN_TAG_PGOFF (LAST_CPUPID_PGOFF - KASAN_TAG_WIDTH)
|
||||
+#define LRU_GEN_PGOFF (KASAN_TAG_PGOFF - LRU_GEN_WIDTH)
|
||||
+#define LRU_REFS_PGOFF (LRU_GEN_PGOFF - LRU_REFS_WIDTH)
|
||||
|
||||
/*
|
||||
* Define the bit shifts to access each section. For non-existent
|
||||
--- a/include/linux/page-flags-layout.h
|
||||
+++ b/include/linux/page-flags-layout.h
|
||||
@@ -55,7 +55,8 @@
|
||||
#define SECTIONS_WIDTH 0
|
||||
#endif
|
||||
|
||||
-#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS
|
||||
+#if ZONES_WIDTH + LRU_GEN_WIDTH + SECTIONS_WIDTH + NODES_SHIFT \
|
||||
+ <= BITS_PER_LONG - NR_PAGEFLAGS
|
||||
#define NODES_WIDTH NODES_SHIFT
|
||||
#elif defined(CONFIG_SPARSEMEM_VMEMMAP)
|
||||
#error "Vmemmap: No space for nodes field in page flags"
|
||||
@@ -89,8 +90,8 @@
|
||||
#define LAST_CPUPID_SHIFT 0
|
||||
#endif
|
||||
|
||||
-#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + KASAN_TAG_WIDTH + LAST_CPUPID_SHIFT \
|
||||
- <= BITS_PER_LONG - NR_PAGEFLAGS
|
||||
+#if ZONES_WIDTH + LRU_GEN_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + \
|
||||
+ KASAN_TAG_WIDTH + LAST_CPUPID_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS
|
||||
#define LAST_CPUPID_WIDTH LAST_CPUPID_SHIFT
|
||||
#else
|
||||
#define LAST_CPUPID_WIDTH 0
|
||||
@@ -100,10 +101,12 @@
|
||||
#define LAST_CPUPID_NOT_IN_PAGE_FLAGS
|
||||
#endif
|
||||
|
||||
-#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + KASAN_TAG_WIDTH + LAST_CPUPID_WIDTH \
|
||||
- > BITS_PER_LONG - NR_PAGEFLAGS
|
||||
+#if ZONES_WIDTH + LRU_GEN_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + \
|
||||
+ KASAN_TAG_WIDTH + LAST_CPUPID_WIDTH > BITS_PER_LONG - NR_PAGEFLAGS
|
||||
#error "Not enough bits in page flags"
|
||||
#endif
|
||||
|
||||
+#define LRU_REFS_WIDTH 0
|
||||
+
|
||||
#endif
|
||||
#endif /* _LINUX_PAGE_FLAGS_LAYOUT */
|
||||
--- a/include/linux/page-flags.h
|
||||
+++ b/include/linux/page-flags.h
|
||||
@@ -1058,7 +1058,7 @@ static __always_inline void __ClearPageA
|
||||
1UL << PG_private | 1UL << PG_private_2 | \
|
||||
1UL << PG_writeback | 1UL << PG_reserved | \
|
||||
1UL << PG_slab | 1UL << PG_active | \
|
||||
- 1UL << PG_unevictable | __PG_MLOCKED)
|
||||
+ 1UL << PG_unevictable | __PG_MLOCKED | LRU_GEN_MASK)
|
||||
|
||||
/*
|
||||
* Flags checked when a page is prepped for return by the page allocator.
|
||||
@@ -1069,7 +1069,7 @@ static __always_inline void __ClearPageA
|
||||
* alloc-free cycle to prevent from reusing the page.
|
||||
*/
|
||||
#define PAGE_FLAGS_CHECK_AT_PREP \
|
||||
- (PAGEFLAGS_MASK & ~__PG_HWPOISON)
|
||||
+ ((PAGEFLAGS_MASK & ~__PG_HWPOISON) | LRU_GEN_MASK | LRU_REFS_MASK)
|
||||
|
||||
#define PAGE_FLAGS_PRIVATE \
|
||||
(1UL << PG_private | 1UL << PG_private_2)
|
||||
--- a/include/linux/sched.h
|
||||
+++ b/include/linux/sched.h
|
||||
@@ -914,6 +914,10 @@ struct task_struct {
|
||||
#ifdef CONFIG_MEMCG
|
||||
unsigned in_user_fault:1;
|
||||
#endif
|
||||
+#ifdef CONFIG_LRU_GEN
|
||||
+ /* whether the LRU algorithm may apply to this access */
|
||||
+ unsigned in_lru_fault:1;
|
||||
+#endif
|
||||
#ifdef CONFIG_COMPAT_BRK
|
||||
unsigned brk_randomized:1;
|
||||
#endif
|
||||
--- a/kernel/bounds.c
|
||||
+++ b/kernel/bounds.c
|
||||
@@ -22,6 +22,11 @@ int main(void)
|
||||
DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS));
|
||||
#endif
|
||||
DEFINE(SPINLOCK_SIZE, sizeof(spinlock_t));
|
||||
+#ifdef CONFIG_LRU_GEN
|
||||
+ DEFINE(LRU_GEN_WIDTH, order_base_2(MAX_NR_GENS + 1));
|
||||
+#else
|
||||
+ DEFINE(LRU_GEN_WIDTH, 0);
|
||||
+#endif
|
||||
/* End of constants */
|
||||
|
||||
return 0;
|
||||
--- a/mm/Kconfig
|
||||
+++ b/mm/Kconfig
|
||||
@@ -1124,6 +1124,14 @@ config PTE_MARKER_UFFD_WP
|
||||
purposes. It is required to enable userfaultfd write protection on
|
||||
file-backed memory types like shmem and hugetlbfs.
|
||||
|
||||
+config LRU_GEN
|
||||
+ bool "Multi-Gen LRU"
|
||||
+ depends on MMU
|
||||
+ # make sure folio->flags has enough spare bits
|
||||
+ depends on 64BIT || !SPARSEMEM || SPARSEMEM_VMEMMAP
|
||||
+ help
|
||||
+ A high performance LRU implementation to overcommit memory.
|
||||
+
|
||||
source "mm/damon/Kconfig"
|
||||
|
||||
endmenu
|
||||
--- a/mm/huge_memory.c
|
||||
+++ b/mm/huge_memory.c
|
||||
@@ -2438,7 +2438,8 @@ static void __split_huge_page_tail(struc
|
||||
#ifdef CONFIG_64BIT
|
||||
(1L << PG_arch_2) |
|
||||
#endif
|
||||
- (1L << PG_dirty)));
|
||||
+ (1L << PG_dirty) |
|
||||
+ LRU_GEN_MASK | LRU_REFS_MASK));
|
||||
|
||||
/* ->mapping in first tail page is compound_mapcount */
|
||||
VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING,
|
||||
--- a/mm/memcontrol.c
|
||||
+++ b/mm/memcontrol.c
|
||||
@@ -5170,6 +5170,7 @@ static void __mem_cgroup_free(struct mem
|
||||
|
||||
static void mem_cgroup_free(struct mem_cgroup *memcg)
|
||||
{
|
||||
+ lru_gen_exit_memcg(memcg);
|
||||
memcg_wb_domain_exit(memcg);
|
||||
__mem_cgroup_free(memcg);
|
||||
}
|
||||
@@ -5228,6 +5229,7 @@ static struct mem_cgroup *mem_cgroup_all
|
||||
memcg->deferred_split_queue.split_queue_len = 0;
|
||||
#endif
|
||||
idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
|
||||
+ lru_gen_init_memcg(memcg);
|
||||
return memcg;
|
||||
fail:
|
||||
mem_cgroup_id_remove(memcg);
|
||||
--- a/mm/memory.c
|
||||
+++ b/mm/memory.c
|
||||
@@ -5110,6 +5110,27 @@ static inline void mm_account_fault(stru
|
||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
|
||||
}
|
||||
|
||||
+#ifdef CONFIG_LRU_GEN
|
||||
+static void lru_gen_enter_fault(struct vm_area_struct *vma)
|
||||
+{
|
||||
+ /* the LRU algorithm doesn't apply to sequential or random reads */
|
||||
+ current->in_lru_fault = !(vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ));
|
||||
+}
|
||||
+
|
||||
+static void lru_gen_exit_fault(void)
|
||||
+{
|
||||
+ current->in_lru_fault = false;
|
||||
+}
|
||||
+#else
|
||||
+static void lru_gen_enter_fault(struct vm_area_struct *vma)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
+static void lru_gen_exit_fault(void)
|
||||
+{
|
||||
+}
|
||||
+#endif /* CONFIG_LRU_GEN */
|
||||
+
|
||||
/*
|
||||
* By the time we get here, we already hold the mm semaphore
|
||||
*
|
||||
@@ -5141,11 +5162,15 @@ vm_fault_t handle_mm_fault(struct vm_are
|
||||
if (flags & FAULT_FLAG_USER)
|
||||
mem_cgroup_enter_user_fault();
|
||||
|
||||
+ lru_gen_enter_fault(vma);
|
||||
+
|
||||
if (unlikely(is_vm_hugetlb_page(vma)))
|
||||
ret = hugetlb_fault(vma->vm_mm, vma, address, flags);
|
||||
else
|
||||
ret = __handle_mm_fault(vma, address, flags);
|
||||
|
||||
+ lru_gen_exit_fault();
|
||||
+
|
||||
if (flags & FAULT_FLAG_USER) {
|
||||
mem_cgroup_exit_user_fault();
|
||||
/*
|
||||
--- a/mm/mm_init.c
|
||||
+++ b/mm/mm_init.c
|
||||
@@ -65,14 +65,16 @@ void __init mminit_verify_pageflags_layo
|
||||
|
||||
shift = 8 * sizeof(unsigned long);
|
||||
width = shift - SECTIONS_WIDTH - NODES_WIDTH - ZONES_WIDTH
|
||||
- - LAST_CPUPID_SHIFT - KASAN_TAG_WIDTH;
|
||||
+ - LAST_CPUPID_SHIFT - KASAN_TAG_WIDTH - LRU_GEN_WIDTH - LRU_REFS_WIDTH;
|
||||
mminit_dprintk(MMINIT_TRACE, "pageflags_layout_widths",
|
||||
- "Section %d Node %d Zone %d Lastcpupid %d Kasantag %d Flags %d\n",
|
||||
+ "Section %d Node %d Zone %d Lastcpupid %d Kasantag %d Gen %d Tier %d Flags %d\n",
|
||||
SECTIONS_WIDTH,
|
||||
NODES_WIDTH,
|
||||
ZONES_WIDTH,
|
||||
LAST_CPUPID_WIDTH,
|
||||
KASAN_TAG_WIDTH,
|
||||
+ LRU_GEN_WIDTH,
|
||||
+ LRU_REFS_WIDTH,
|
||||
NR_PAGEFLAGS);
|
||||
mminit_dprintk(MMINIT_TRACE, "pageflags_layout_shifts",
|
||||
"Section %d Node %d Zone %d Lastcpupid %d Kasantag %d\n",
|
||||
--- a/mm/mmzone.c
|
||||
+++ b/mm/mmzone.c
|
||||
@@ -88,6 +88,8 @@ void lruvec_init(struct lruvec *lruvec)
|
||||
* Poison its list head, so that any operations on it would crash.
|
||||
*/
|
||||
list_del(&lruvec->lists[LRU_UNEVICTABLE]);
|
||||
+
|
||||
+ lru_gen_init_lruvec(lruvec);
|
||||
}
|
||||
|
||||
#if defined(CONFIG_NUMA_BALANCING) && !defined(LAST_CPUPID_NOT_IN_PAGE_FLAGS)
|
||||
--- a/mm/swap.c
|
||||
+++ b/mm/swap.c
|
||||
@@ -484,6 +484,11 @@ void folio_add_lru(struct folio *folio)
|
||||
folio_test_unevictable(folio), folio);
|
||||
VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
|
||||
|
||||
+ /* see the comment in lru_gen_add_folio() */
|
||||
+ if (lru_gen_enabled() && !folio_test_unevictable(folio) &&
|
||||
+ lru_gen_in_fault() && !(current->flags & PF_MEMALLOC))
|
||||
+ folio_set_active(folio);
|
||||
+
|
||||
folio_get(folio);
|
||||
local_lock(&cpu_fbatches.lock);
|
||||
fbatch = this_cpu_ptr(&cpu_fbatches.lru_add);
|
||||
@@ -575,7 +580,7 @@ static void lru_deactivate_file_fn(struc
|
||||
|
||||
static void lru_deactivate_fn(struct lruvec *lruvec, struct folio *folio)
|
||||
{
|
||||
- if (folio_test_active(folio) && !folio_test_unevictable(folio)) {
|
||||
+ if (!folio_test_unevictable(folio) && (folio_test_active(folio) || lru_gen_enabled())) {
|
||||
long nr_pages = folio_nr_pages(folio);
|
||||
|
||||
lruvec_del_folio(lruvec, folio);
|
||||
@@ -688,8 +693,8 @@ void deactivate_page(struct page *page)
|
||||
{
|
||||
struct folio *folio = page_folio(page);
|
||||
|
||||
- if (folio_test_lru(folio) && folio_test_active(folio) &&
|
||||
- !folio_test_unevictable(folio)) {
|
||||
+ if (folio_test_lru(folio) && !folio_test_unevictable(folio) &&
|
||||
+ (folio_test_active(folio) || lru_gen_enabled())) {
|
||||
struct folio_batch *fbatch;
|
||||
|
||||
folio_get(folio);
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -3050,6 +3050,81 @@ static bool can_age_anon_pages(struct pg
|
||||
return can_demote(pgdat->node_id, sc);
|
||||
}
|
||||
|
||||
+#ifdef CONFIG_LRU_GEN
|
||||
+
|
||||
+/******************************************************************************
|
||||
+ * shorthand helpers
|
||||
+ ******************************************************************************/
|
||||
+
|
||||
+#define for_each_gen_type_zone(gen, type, zone) \
|
||||
+ for ((gen) = 0; (gen) < MAX_NR_GENS; (gen)++) \
|
||||
+ for ((type) = 0; (type) < ANON_AND_FILE; (type)++) \
|
||||
+ for ((zone) = 0; (zone) < MAX_NR_ZONES; (zone)++)
|
||||
+
|
||||
+static struct lruvec __maybe_unused *get_lruvec(struct mem_cgroup *memcg, int nid)
|
||||
+{
|
||||
+ struct pglist_data *pgdat = NODE_DATA(nid);
|
||||
+
|
||||
+#ifdef CONFIG_MEMCG
|
||||
+ if (memcg) {
|
||||
+ struct lruvec *lruvec = &memcg->nodeinfo[nid]->lruvec;
|
||||
+
|
||||
+ /* for hotadd_new_pgdat() */
|
||||
+ if (!lruvec->pgdat)
|
||||
+ lruvec->pgdat = pgdat;
|
||||
+
|
||||
+ return lruvec;
|
||||
+ }
|
||||
+#endif
|
||||
+ VM_WARN_ON_ONCE(!mem_cgroup_disabled());
|
||||
+
|
||||
+ return pgdat ? &pgdat->__lruvec : NULL;
|
||||
+}
|
||||
+
|
||||
+/******************************************************************************
|
||||
+ * initialization
|
||||
+ ******************************************************************************/
|
||||
+
|
||||
+void lru_gen_init_lruvec(struct lruvec *lruvec)
|
||||
+{
|
||||
+ int gen, type, zone;
|
||||
+ struct lru_gen_struct *lrugen = &lruvec->lrugen;
|
||||
+
|
||||
+ lrugen->max_seq = MIN_NR_GENS + 1;
|
||||
+
|
||||
+ for_each_gen_type_zone(gen, type, zone)
|
||||
+ INIT_LIST_HEAD(&lrugen->lists[gen][type][zone]);
|
||||
+}
|
||||
+
|
||||
+#ifdef CONFIG_MEMCG
|
||||
+void lru_gen_init_memcg(struct mem_cgroup *memcg)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
+void lru_gen_exit_memcg(struct mem_cgroup *memcg)
|
||||
+{
|
||||
+ int nid;
|
||||
+
|
||||
+ for_each_node(nid) {
|
||||
+ struct lruvec *lruvec = get_lruvec(memcg, nid);
|
||||
+
|
||||
+ VM_WARN_ON_ONCE(memchr_inv(lruvec->lrugen.nr_pages, 0,
|
||||
+ sizeof(lruvec->lrugen.nr_pages)));
|
||||
+ }
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
+static int __init init_lru_gen(void)
|
||||
+{
|
||||
+ BUILD_BUG_ON(MIN_NR_GENS + 1 >= MAX_NR_GENS);
|
||||
+ BUILD_BUG_ON(BIT(LRU_GEN_WIDTH) <= MAX_NR_GENS);
|
||||
+
|
||||
+ return 0;
|
||||
+};
|
||||
+late_initcall(init_lru_gen);
|
||||
+
|
||||
+#endif /* CONFIG_LRU_GEN */
|
||||
+
|
||||
static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
|
||||
{
|
||||
unsigned long nr[NR_LRU_LISTS];
|
File diff suppressed because it is too large
Load Diff
@ -1,476 +0,0 @@
|
||||
From 93fa87bdef9e7fa9977355c4712c000f31639231 Mon Sep 17 00:00:00 2001
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
Date: Thu, 27 Jan 2022 20:43:22 -0700
|
||||
Subject: [PATCH 07/14] mm: multi-gen LRU: exploit locality in rmap
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Searching the rmap for PTEs mapping each page on an LRU list (to test
|
||||
and clear the accessed bit) can be expensive because pages from
|
||||
different VMAs (PA space) are not cache friendly to the rmap (VA
|
||||
space). For workloads mostly using mapped pages, searching the rmap
|
||||
can incur the highest CPU cost in the reclaim path.
|
||||
|
||||
This patch exploits spatial locality to reduce the trips into the
|
||||
rmap. When shrink_page_list() walks the rmap and finds a young PTE, a
|
||||
new function lru_gen_look_around() scans at most BITS_PER_LONG-1
|
||||
adjacent PTEs. On finding another young PTE, it clears the accessed
|
||||
bit and updates the gen counter of the page mapped by this PTE to
|
||||
(max_seq%MAX_NR_GENS)+1.
|
||||
|
||||
Server benchmark results:
|
||||
Single workload:
|
||||
fio (buffered I/O): no change
|
||||
|
||||
Single workload:
|
||||
memcached (anon): +[3, 5]%
|
||||
Ops/sec KB/sec
|
||||
patch1-6: 1106168.46 43025.04
|
||||
patch1-7: 1147696.57 44640.29
|
||||
|
||||
Configurations:
|
||||
no change
|
||||
|
||||
Client benchmark results:
|
||||
kswapd profiles:
|
||||
patch1-6
|
||||
39.03% lzo1x_1_do_compress (real work)
|
||||
18.47% page_vma_mapped_walk (overhead)
|
||||
6.74% _raw_spin_unlock_irq
|
||||
3.97% do_raw_spin_lock
|
||||
2.49% ptep_clear_flush
|
||||
2.48% anon_vma_interval_tree_iter_first
|
||||
1.92% folio_referenced_one
|
||||
1.88% __zram_bvec_write
|
||||
1.48% memmove
|
||||
1.31% vma_interval_tree_iter_next
|
||||
|
||||
patch1-7
|
||||
48.16% lzo1x_1_do_compress (real work)
|
||||
8.20% page_vma_mapped_walk (overhead)
|
||||
7.06% _raw_spin_unlock_irq
|
||||
2.92% ptep_clear_flush
|
||||
2.53% __zram_bvec_write
|
||||
2.11% do_raw_spin_lock
|
||||
2.02% memmove
|
||||
1.93% lru_gen_look_around
|
||||
1.56% free_unref_page_list
|
||||
1.40% memset
|
||||
|
||||
Configurations:
|
||||
no change
|
||||
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Acked-by: Barry Song <baohua@kernel.org>
|
||||
Acked-by: Brian Geffon <bgeffon@google.com>
|
||||
Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
|
||||
Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
|
||||
Acked-by: Steven Barrett <steven@liquorix.net>
|
||||
Acked-by: Suleiman Souhlal <suleiman@google.com>
|
||||
Tested-by: Daniel Byrne <djbyrne@mtu.edu>
|
||||
Tested-by: Donald Carr <d@chaos-reins.com>
|
||||
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
|
||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
|
||||
Tested-by: Sofia Trinh <sofia.trinh@edi.works>
|
||||
Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
|
||||
Change-Id: I4b9ca0fd20f566ce554e703f14cee3fe0048c2fd
|
||||
---
|
||||
include/linux/memcontrol.h | 31 +++++++
|
||||
include/linux/mm.h | 5 +
|
||||
include/linux/mmzone.h | 6 ++
|
||||
mm/internal.h | 1 +
|
||||
mm/memcontrol.c | 1 +
|
||||
mm/rmap.c | 6 ++
|
||||
mm/swap.c | 4 +-
|
||||
mm/vmscan.c | 184 +++++++++++++++++++++++++++++++++++++
|
||||
8 files changed, 236 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/include/linux/memcontrol.h
|
||||
+++ b/include/linux/memcontrol.h
|
||||
@@ -444,6 +444,7 @@ static inline struct obj_cgroup *__folio
|
||||
* - LRU isolation
|
||||
* - lock_page_memcg()
|
||||
* - exclusive reference
|
||||
+ * - mem_cgroup_trylock_pages()
|
||||
*
|
||||
* For a kmem folio a caller should hold an rcu read lock to protect memcg
|
||||
* associated with a kmem folio from being released.
|
||||
@@ -505,6 +506,7 @@ static inline struct mem_cgroup *folio_m
|
||||
* - LRU isolation
|
||||
* - lock_page_memcg()
|
||||
* - exclusive reference
|
||||
+ * - mem_cgroup_trylock_pages()
|
||||
*
|
||||
* For a kmem page a caller should hold an rcu read lock to protect memcg
|
||||
* associated with a kmem page from being released.
|
||||
@@ -959,6 +961,23 @@ void unlock_page_memcg(struct page *page
|
||||
|
||||
void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val);
|
||||
|
||||
+/* try to stablize folio_memcg() for all the pages in a memcg */
|
||||
+static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg)
|
||||
+{
|
||||
+ rcu_read_lock();
|
||||
+
|
||||
+ if (mem_cgroup_disabled() || !atomic_read(&memcg->moving_account))
|
||||
+ return true;
|
||||
+
|
||||
+ rcu_read_unlock();
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+static inline void mem_cgroup_unlock_pages(void)
|
||||
+{
|
||||
+ rcu_read_unlock();
|
||||
+}
|
||||
+
|
||||
/* idx can be of type enum memcg_stat_item or node_stat_item */
|
||||
static inline void mod_memcg_state(struct mem_cgroup *memcg,
|
||||
int idx, int val)
|
||||
@@ -1433,6 +1452,18 @@ static inline void folio_memcg_unlock(st
|
||||
{
|
||||
}
|
||||
|
||||
+static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg)
|
||||
+{
|
||||
+ /* to match folio_memcg_rcu() */
|
||||
+ rcu_read_lock();
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+static inline void mem_cgroup_unlock_pages(void)
|
||||
+{
|
||||
+ rcu_read_unlock();
|
||||
+}
|
||||
+
|
||||
static inline void mem_cgroup_handle_over_high(void)
|
||||
{
|
||||
}
|
||||
--- a/include/linux/mm.h
|
||||
+++ b/include/linux/mm.h
|
||||
@@ -1465,6 +1465,11 @@ static inline unsigned long folio_pfn(st
|
||||
return page_to_pfn(&folio->page);
|
||||
}
|
||||
|
||||
+static inline struct folio *pfn_folio(unsigned long pfn)
|
||||
+{
|
||||
+ return page_folio(pfn_to_page(pfn));
|
||||
+}
|
||||
+
|
||||
static inline atomic_t *folio_pincount_ptr(struct folio *folio)
|
||||
{
|
||||
return &folio_page(folio, 1)->compound_pincount;
|
||||
--- a/include/linux/mmzone.h
|
||||
+++ b/include/linux/mmzone.h
|
||||
@@ -372,6 +372,7 @@ enum lruvec_flags {
|
||||
#ifndef __GENERATING_BOUNDS_H
|
||||
|
||||
struct lruvec;
|
||||
+struct page_vma_mapped_walk;
|
||||
|
||||
#define LRU_GEN_MASK ((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF)
|
||||
#define LRU_REFS_MASK ((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF)
|
||||
@@ -427,6 +428,7 @@ struct lru_gen_struct {
|
||||
};
|
||||
|
||||
void lru_gen_init_lruvec(struct lruvec *lruvec);
|
||||
+void lru_gen_look_around(struct page_vma_mapped_walk *pvmw);
|
||||
|
||||
#ifdef CONFIG_MEMCG
|
||||
void lru_gen_init_memcg(struct mem_cgroup *memcg);
|
||||
@@ -439,6 +441,10 @@ static inline void lru_gen_init_lruvec(s
|
||||
{
|
||||
}
|
||||
|
||||
+static inline void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
#ifdef CONFIG_MEMCG
|
||||
static inline void lru_gen_init_memcg(struct mem_cgroup *memcg)
|
||||
{
|
||||
--- a/mm/internal.h
|
||||
+++ b/mm/internal.h
|
||||
@@ -83,6 +83,7 @@ vm_fault_t do_swap_page(struct vm_fault
|
||||
void folio_rotate_reclaimable(struct folio *folio);
|
||||
bool __folio_end_writeback(struct folio *folio);
|
||||
void deactivate_file_folio(struct folio *folio);
|
||||
+void folio_activate(struct folio *folio);
|
||||
|
||||
void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
|
||||
unsigned long floor, unsigned long ceiling);
|
||||
--- a/mm/memcontrol.c
|
||||
+++ b/mm/memcontrol.c
|
||||
@@ -2789,6 +2789,7 @@ static void commit_charge(struct folio *
|
||||
* - LRU isolation
|
||||
* - lock_page_memcg()
|
||||
* - exclusive reference
|
||||
+ * - mem_cgroup_trylock_pages()
|
||||
*/
|
||||
folio->memcg_data = (unsigned long)memcg;
|
||||
}
|
||||
--- a/mm/rmap.c
|
||||
+++ b/mm/rmap.c
|
||||
@@ -833,6 +833,12 @@ static bool folio_referenced_one(struct
|
||||
}
|
||||
|
||||
if (pvmw.pte) {
|
||||
+ if (lru_gen_enabled() && pte_young(*pvmw.pte) &&
|
||||
+ !(vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ))) {
|
||||
+ lru_gen_look_around(&pvmw);
|
||||
+ referenced++;
|
||||
+ }
|
||||
+
|
||||
if (ptep_clear_flush_young_notify(vma, address,
|
||||
pvmw.pte)) {
|
||||
/*
|
||||
--- a/mm/swap.c
|
||||
+++ b/mm/swap.c
|
||||
@@ -366,7 +366,7 @@ static void folio_activate_drain(int cpu
|
||||
folio_batch_move_lru(fbatch, folio_activate_fn);
|
||||
}
|
||||
|
||||
-static void folio_activate(struct folio *folio)
|
||||
+void folio_activate(struct folio *folio)
|
||||
{
|
||||
if (folio_test_lru(folio) && !folio_test_active(folio) &&
|
||||
!folio_test_unevictable(folio)) {
|
||||
@@ -385,7 +385,7 @@ static inline void folio_activate_drain(
|
||||
{
|
||||
}
|
||||
|
||||
-static void folio_activate(struct folio *folio)
|
||||
+void folio_activate(struct folio *folio)
|
||||
{
|
||||
struct lruvec *lruvec;
|
||||
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -1635,6 +1635,11 @@ retry:
|
||||
if (!sc->may_unmap && folio_mapped(folio))
|
||||
goto keep_locked;
|
||||
|
||||
+ /* folio_update_gen() tried to promote this page? */
|
||||
+ if (lru_gen_enabled() && !ignore_references &&
|
||||
+ folio_mapped(folio) && folio_test_referenced(folio))
|
||||
+ goto keep_locked;
|
||||
+
|
||||
/*
|
||||
* The number of dirty pages determines if a node is marked
|
||||
* reclaim_congested. kswapd will stall and start writing
|
||||
@@ -3219,6 +3224,29 @@ static bool positive_ctrl_err(struct ctr
|
||||
* the aging
|
||||
******************************************************************************/
|
||||
|
||||
+/* promote pages accessed through page tables */
|
||||
+static int folio_update_gen(struct folio *folio, int gen)
|
||||
+{
|
||||
+ unsigned long new_flags, old_flags = READ_ONCE(folio->flags);
|
||||
+
|
||||
+ VM_WARN_ON_ONCE(gen >= MAX_NR_GENS);
|
||||
+ VM_WARN_ON_ONCE(!rcu_read_lock_held());
|
||||
+
|
||||
+ do {
|
||||
+ /* lru_gen_del_folio() has isolated this page? */
|
||||
+ if (!(old_flags & LRU_GEN_MASK)) {
|
||||
+ /* for shrink_page_list() */
|
||||
+ new_flags = old_flags | BIT(PG_referenced);
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ new_flags = old_flags & ~(LRU_GEN_MASK | LRU_REFS_MASK | LRU_REFS_FLAGS);
|
||||
+ new_flags |= (gen + 1UL) << LRU_GEN_PGOFF;
|
||||
+ } while (!try_cmpxchg(&folio->flags, &old_flags, new_flags));
|
||||
+
|
||||
+ return ((old_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
|
||||
+}
|
||||
+
|
||||
/* protect pages accessed multiple times through file descriptors */
|
||||
static int folio_inc_gen(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
|
||||
{
|
||||
@@ -3230,6 +3258,11 @@ static int folio_inc_gen(struct lruvec *
|
||||
VM_WARN_ON_ONCE_FOLIO(!(old_flags & LRU_GEN_MASK), folio);
|
||||
|
||||
do {
|
||||
+ new_gen = ((old_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
|
||||
+ /* folio_update_gen() has promoted this page? */
|
||||
+ if (new_gen >= 0 && new_gen != old_gen)
|
||||
+ return new_gen;
|
||||
+
|
||||
new_gen = (old_gen + 1) % MAX_NR_GENS;
|
||||
|
||||
new_flags = old_flags & ~(LRU_GEN_MASK | LRU_REFS_MASK | LRU_REFS_FLAGS);
|
||||
@@ -3244,6 +3277,43 @@ static int folio_inc_gen(struct lruvec *
|
||||
return new_gen;
|
||||
}
|
||||
|
||||
+static unsigned long get_pte_pfn(pte_t pte, struct vm_area_struct *vma, unsigned long addr)
|
||||
+{
|
||||
+ unsigned long pfn = pte_pfn(pte);
|
||||
+
|
||||
+ VM_WARN_ON_ONCE(addr < vma->vm_start || addr >= vma->vm_end);
|
||||
+
|
||||
+ if (!pte_present(pte) || is_zero_pfn(pfn))
|
||||
+ return -1;
|
||||
+
|
||||
+ if (WARN_ON_ONCE(pte_devmap(pte) || pte_special(pte)))
|
||||
+ return -1;
|
||||
+
|
||||
+ if (WARN_ON_ONCE(!pfn_valid(pfn)))
|
||||
+ return -1;
|
||||
+
|
||||
+ return pfn;
|
||||
+}
|
||||
+
|
||||
+static struct folio *get_pfn_folio(unsigned long pfn, struct mem_cgroup *memcg,
|
||||
+ struct pglist_data *pgdat)
|
||||
+{
|
||||
+ struct folio *folio;
|
||||
+
|
||||
+ /* try to avoid unnecessary memory loads */
|
||||
+ if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat))
|
||||
+ return NULL;
|
||||
+
|
||||
+ folio = pfn_folio(pfn);
|
||||
+ if (folio_nid(folio) != pgdat->node_id)
|
||||
+ return NULL;
|
||||
+
|
||||
+ if (folio_memcg_rcu(folio) != memcg)
|
||||
+ return NULL;
|
||||
+
|
||||
+ return folio;
|
||||
+}
|
||||
+
|
||||
static void inc_min_seq(struct lruvec *lruvec, int type)
|
||||
{
|
||||
struct lru_gen_struct *lrugen = &lruvec->lrugen;
|
||||
@@ -3443,6 +3513,114 @@ static void lru_gen_age_node(struct pgli
|
||||
} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * This function exploits spatial locality when shrink_page_list() walks the
|
||||
+ * rmap. It scans the adjacent PTEs of a young PTE and promotes hot pages.
|
||||
+ */
|
||||
+void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
|
||||
+{
|
||||
+ int i;
|
||||
+ pte_t *pte;
|
||||
+ unsigned long start;
|
||||
+ unsigned long end;
|
||||
+ unsigned long addr;
|
||||
+ unsigned long bitmap[BITS_TO_LONGS(MIN_LRU_BATCH)] = {};
|
||||
+ struct folio *folio = pfn_folio(pvmw->pfn);
|
||||
+ struct mem_cgroup *memcg = folio_memcg(folio);
|
||||
+ struct pglist_data *pgdat = folio_pgdat(folio);
|
||||
+ struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
|
||||
+ DEFINE_MAX_SEQ(lruvec);
|
||||
+ int old_gen, new_gen = lru_gen_from_seq(max_seq);
|
||||
+
|
||||
+ lockdep_assert_held(pvmw->ptl);
|
||||
+ VM_WARN_ON_ONCE_FOLIO(folio_test_lru(folio), folio);
|
||||
+
|
||||
+ if (spin_is_contended(pvmw->ptl))
|
||||
+ return;
|
||||
+
|
||||
+ start = max(pvmw->address & PMD_MASK, pvmw->vma->vm_start);
|
||||
+ end = min(pvmw->address | ~PMD_MASK, pvmw->vma->vm_end - 1) + 1;
|
||||
+
|
||||
+ if (end - start > MIN_LRU_BATCH * PAGE_SIZE) {
|
||||
+ if (pvmw->address - start < MIN_LRU_BATCH * PAGE_SIZE / 2)
|
||||
+ end = start + MIN_LRU_BATCH * PAGE_SIZE;
|
||||
+ else if (end - pvmw->address < MIN_LRU_BATCH * PAGE_SIZE / 2)
|
||||
+ start = end - MIN_LRU_BATCH * PAGE_SIZE;
|
||||
+ else {
|
||||
+ start = pvmw->address - MIN_LRU_BATCH * PAGE_SIZE / 2;
|
||||
+ end = pvmw->address + MIN_LRU_BATCH * PAGE_SIZE / 2;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ pte = pvmw->pte - (pvmw->address - start) / PAGE_SIZE;
|
||||
+
|
||||
+ rcu_read_lock();
|
||||
+ arch_enter_lazy_mmu_mode();
|
||||
+
|
||||
+ for (i = 0, addr = start; addr != end; i++, addr += PAGE_SIZE) {
|
||||
+ unsigned long pfn;
|
||||
+
|
||||
+ pfn = get_pte_pfn(pte[i], pvmw->vma, addr);
|
||||
+ if (pfn == -1)
|
||||
+ continue;
|
||||
+
|
||||
+ if (!pte_young(pte[i]))
|
||||
+ continue;
|
||||
+
|
||||
+ folio = get_pfn_folio(pfn, memcg, pgdat);
|
||||
+ if (!folio)
|
||||
+ continue;
|
||||
+
|
||||
+ if (!ptep_test_and_clear_young(pvmw->vma, addr, pte + i))
|
||||
+ VM_WARN_ON_ONCE(true);
|
||||
+
|
||||
+ if (pte_dirty(pte[i]) && !folio_test_dirty(folio) &&
|
||||
+ !(folio_test_anon(folio) && folio_test_swapbacked(folio) &&
|
||||
+ !folio_test_swapcache(folio)))
|
||||
+ folio_mark_dirty(folio);
|
||||
+
|
||||
+ old_gen = folio_lru_gen(folio);
|
||||
+ if (old_gen < 0)
|
||||
+ folio_set_referenced(folio);
|
||||
+ else if (old_gen != new_gen)
|
||||
+ __set_bit(i, bitmap);
|
||||
+ }
|
||||
+
|
||||
+ arch_leave_lazy_mmu_mode();
|
||||
+ rcu_read_unlock();
|
||||
+
|
||||
+ if (bitmap_weight(bitmap, MIN_LRU_BATCH) < PAGEVEC_SIZE) {
|
||||
+ for_each_set_bit(i, bitmap, MIN_LRU_BATCH) {
|
||||
+ folio = pfn_folio(pte_pfn(pte[i]));
|
||||
+ folio_activate(folio);
|
||||
+ }
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ /* folio_update_gen() requires stable folio_memcg() */
|
||||
+ if (!mem_cgroup_trylock_pages(memcg))
|
||||
+ return;
|
||||
+
|
||||
+ spin_lock_irq(&lruvec->lru_lock);
|
||||
+ new_gen = lru_gen_from_seq(lruvec->lrugen.max_seq);
|
||||
+
|
||||
+ for_each_set_bit(i, bitmap, MIN_LRU_BATCH) {
|
||||
+ folio = pfn_folio(pte_pfn(pte[i]));
|
||||
+ if (folio_memcg_rcu(folio) != memcg)
|
||||
+ continue;
|
||||
+
|
||||
+ old_gen = folio_update_gen(folio, new_gen);
|
||||
+ if (old_gen < 0 || old_gen == new_gen)
|
||||
+ continue;
|
||||
+
|
||||
+ lru_gen_update_size(lruvec, folio, old_gen, new_gen);
|
||||
+ }
|
||||
+
|
||||
+ spin_unlock_irq(&lruvec->lru_lock);
|
||||
+
|
||||
+ mem_cgroup_unlock_pages();
|
||||
+}
|
||||
+
|
||||
/******************************************************************************
|
||||
* the eviction
|
||||
******************************************************************************/
|
||||
@@ -3479,6 +3657,12 @@ static bool sort_folio(struct lruvec *lr
|
||||
return true;
|
||||
}
|
||||
|
||||
+ /* promoted */
|
||||
+ if (gen != lru_gen_from_seq(lrugen->min_seq[type])) {
|
||||
+ list_move(&folio->lru, &lrugen->lists[gen][type][zone]);
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
/* protected */
|
||||
if (tier > tier_idx) {
|
||||
int hist = lru_hist_from_seq(lrugen->min_seq[type]);
|
File diff suppressed because it is too large
Load Diff
@ -1,290 +0,0 @@
|
||||
From 6b9670b94ba2b49b289b997121062500e32fc3e4 Mon Sep 17 00:00:00 2001
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
Date: Thu, 27 Jan 2022 19:59:54 -0700
|
||||
Subject: [PATCH 09/14] mm: multi-gen LRU: optimize multiple memcgs
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
When multiple memcgs are available, it is possible to use generations
|
||||
as a frame of reference to make better choices and improve overall
|
||||
performance under global memory pressure. This patch adds a basic
|
||||
optimization to select memcgs that can drop single-use unmapped clean
|
||||
pages first. Doing so reduces the chance of going into the aging path
|
||||
or swapping, which can be costly.
|
||||
|
||||
A typical example that benefits from this optimization is a server
|
||||
running mixed types of workloads, e.g., heavy anon workload in one
|
||||
memcg and heavy buffered I/O workload in the other.
|
||||
|
||||
Though this optimization can be applied to both kswapd and direct
|
||||
reclaim, it is only added to kswapd to keep the patchset manageable.
|
||||
Later improvements may cover the direct reclaim path.
|
||||
|
||||
While ensuring certain fairness to all eligible memcgs, proportional
|
||||
scans of individual memcgs also require proper backoff to avoid
|
||||
overshooting their aggregate reclaim target by too much. Otherwise it
|
||||
can cause high direct reclaim latency. The conditions for backoff are:
|
||||
1. At low priorities, for direct reclaim, if aging fairness or direct
|
||||
reclaim latency is at risk, i.e., aging one memcg multiple times or
|
||||
swapping after the target is met.
|
||||
2. At high priorities, for global reclaim, if per-zone free pages are
|
||||
above respective watermarks.
|
||||
|
||||
Server benchmark results:
|
||||
Mixed workloads:
|
||||
fio (buffered I/O): +[19, 21]%
|
||||
IOPS BW
|
||||
patch1-8: 1880k 7343MiB/s
|
||||
patch1-9: 2252k 8796MiB/s
|
||||
|
||||
memcached (anon): +[119, 123]%
|
||||
Ops/sec KB/sec
|
||||
patch1-8: 862768.65 33514.68
|
||||
patch1-9: 1911022.12 74234.54
|
||||
|
||||
Mixed workloads:
|
||||
fio (buffered I/O): +[75, 77]%
|
||||
IOPS BW
|
||||
5.19-rc1: 1279k 4996MiB/s
|
||||
patch1-9: 2252k 8796MiB/s
|
||||
|
||||
memcached (anon): +[13, 15]%
|
||||
Ops/sec KB/sec
|
||||
5.19-rc1: 1673524.04 65008.87
|
||||
patch1-9: 1911022.12 74234.54
|
||||
|
||||
Configurations:
|
||||
(changes since patch 6)
|
||||
|
||||
cat mixed.sh
|
||||
modprobe brd rd_nr=2 rd_size=56623104
|
||||
|
||||
swapoff -a
|
||||
mkswap /dev/ram0
|
||||
swapon /dev/ram0
|
||||
|
||||
mkfs.ext4 /dev/ram1
|
||||
mount -t ext4 /dev/ram1 /mnt
|
||||
|
||||
memtier_benchmark -S /var/run/memcached/memcached.sock \
|
||||
-P memcache_binary -n allkeys --key-minimum=1 \
|
||||
--key-maximum=50000000 --key-pattern=P:P -c 1 -t 36 \
|
||||
--ratio 1:0 --pipeline 8 -d 2000
|
||||
|
||||
fio -name=mglru --numjobs=36 --directory=/mnt --size=1408m \
|
||||
--buffered=1 --ioengine=io_uring --iodepth=128 \
|
||||
--iodepth_batch_submit=32 --iodepth_batch_complete=32 \
|
||||
--rw=randread --random_distribution=random --norandommap \
|
||||
--time_based --ramp_time=10m --runtime=90m --group_reporting &
|
||||
pid=$!
|
||||
|
||||
sleep 200
|
||||
|
||||
memtier_benchmark -S /var/run/memcached/memcached.sock \
|
||||
-P memcache_binary -n allkeys --key-minimum=1 \
|
||||
--key-maximum=50000000 --key-pattern=R:R -c 1 -t 36 \
|
||||
--ratio 0:1 --pipeline 8 --randomize --distinct-client-seed
|
||||
|
||||
kill -INT $pid
|
||||
wait
|
||||
|
||||
Client benchmark results:
|
||||
no change (CONFIG_MEMCG=n)
|
||||
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Acked-by: Brian Geffon <bgeffon@google.com>
|
||||
Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
|
||||
Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
|
||||
Acked-by: Steven Barrett <steven@liquorix.net>
|
||||
Acked-by: Suleiman Souhlal <suleiman@google.com>
|
||||
Tested-by: Daniel Byrne <djbyrne@mtu.edu>
|
||||
Tested-by: Donald Carr <d@chaos-reins.com>
|
||||
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
|
||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
|
||||
Tested-by: Sofia Trinh <sofia.trinh@edi.works>
|
||||
Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
|
||||
Change-Id: I7e00e0c733437e534ac98031cf8154a681becc00
|
||||
---
|
||||
mm/vmscan.c | 104 +++++++++++++++++++++++++++++++++++++++++++++++-----
|
||||
1 file changed, 95 insertions(+), 9 deletions(-)
|
||||
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -131,6 +131,12 @@ struct scan_control {
|
||||
/* Always discard instead of demoting to lower tier memory */
|
||||
unsigned int no_demotion:1;
|
||||
|
||||
+#ifdef CONFIG_LRU_GEN
|
||||
+ /* help kswapd make better choices among multiple memcgs */
|
||||
+ unsigned int memcgs_need_aging:1;
|
||||
+ unsigned long last_reclaimed;
|
||||
+#endif
|
||||
+
|
||||
/* Allocation order */
|
||||
s8 order;
|
||||
|
||||
@@ -4429,6 +4435,19 @@ static void lru_gen_age_node(struct pgli
|
||||
|
||||
VM_WARN_ON_ONCE(!current_is_kswapd());
|
||||
|
||||
+ sc->last_reclaimed = sc->nr_reclaimed;
|
||||
+
|
||||
+ /*
|
||||
+ * To reduce the chance of going into the aging path, which can be
|
||||
+ * costly, optimistically skip it if the flag below was cleared in the
|
||||
+ * eviction path. This improves the overall performance when multiple
|
||||
+ * memcgs are available.
|
||||
+ */
|
||||
+ if (!sc->memcgs_need_aging) {
|
||||
+ sc->memcgs_need_aging = true;
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
set_mm_walk(pgdat);
|
||||
|
||||
memcg = mem_cgroup_iter(NULL, NULL, NULL);
|
||||
@@ -4840,7 +4859,8 @@ static int isolate_folios(struct lruvec
|
||||
return scanned;
|
||||
}
|
||||
|
||||
-static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swappiness)
|
||||
+static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swappiness,
|
||||
+ bool *need_swapping)
|
||||
{
|
||||
int type;
|
||||
int scanned;
|
||||
@@ -4903,6 +4923,9 @@ static int evict_folios(struct lruvec *l
|
||||
|
||||
sc->nr_reclaimed += reclaimed;
|
||||
|
||||
+ if (need_swapping && type == LRU_GEN_ANON)
|
||||
+ *need_swapping = true;
|
||||
+
|
||||
return scanned;
|
||||
}
|
||||
|
||||
@@ -4912,9 +4935,8 @@ static int evict_folios(struct lruvec *l
|
||||
* reclaim.
|
||||
*/
|
||||
static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc,
|
||||
- bool can_swap)
|
||||
+ bool can_swap, bool *need_aging)
|
||||
{
|
||||
- bool need_aging;
|
||||
unsigned long nr_to_scan;
|
||||
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
|
||||
DEFINE_MAX_SEQ(lruvec);
|
||||
@@ -4924,8 +4946,8 @@ static unsigned long get_nr_to_scan(stru
|
||||
(mem_cgroup_below_low(memcg) && !sc->memcg_low_reclaim))
|
||||
return 0;
|
||||
|
||||
- need_aging = should_run_aging(lruvec, max_seq, min_seq, sc, can_swap, &nr_to_scan);
|
||||
- if (!need_aging)
|
||||
+ *need_aging = should_run_aging(lruvec, max_seq, min_seq, sc, can_swap, &nr_to_scan);
|
||||
+ if (!*need_aging)
|
||||
return nr_to_scan;
|
||||
|
||||
/* skip the aging path at the default priority */
|
||||
@@ -4942,10 +4964,67 @@ done:
|
||||
return min_seq[!can_swap] + MIN_NR_GENS <= max_seq ? nr_to_scan : 0;
|
||||
}
|
||||
|
||||
+static bool should_abort_scan(struct lruvec *lruvec, unsigned long seq,
|
||||
+ struct scan_control *sc, bool need_swapping)
|
||||
+{
|
||||
+ int i;
|
||||
+ DEFINE_MAX_SEQ(lruvec);
|
||||
+
|
||||
+ if (!current_is_kswapd()) {
|
||||
+ /* age each memcg at most once to ensure fairness */
|
||||
+ if (max_seq - seq > 1)
|
||||
+ return true;
|
||||
+
|
||||
+ /* over-swapping can increase allocation latency */
|
||||
+ if (sc->nr_reclaimed >= sc->nr_to_reclaim && need_swapping)
|
||||
+ return true;
|
||||
+
|
||||
+ /* give this thread a chance to exit and free its memory */
|
||||
+ if (fatal_signal_pending(current)) {
|
||||
+ sc->nr_reclaimed += MIN_LRU_BATCH;
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
+ if (cgroup_reclaim(sc))
|
||||
+ return false;
|
||||
+ } else if (sc->nr_reclaimed - sc->last_reclaimed < sc->nr_to_reclaim)
|
||||
+ return false;
|
||||
+
|
||||
+ /* keep scanning at low priorities to ensure fairness */
|
||||
+ if (sc->priority > DEF_PRIORITY - 2)
|
||||
+ return false;
|
||||
+
|
||||
+ /*
|
||||
+ * A minimum amount of work was done under global memory pressure. For
|
||||
+ * kswapd, it may be overshooting. For direct reclaim, the allocation
|
||||
+ * may succeed if all suitable zones are somewhat safe. In either case,
|
||||
+ * it's better to stop now, and restart later if necessary.
|
||||
+ */
|
||||
+ for (i = 0; i <= sc->reclaim_idx; i++) {
|
||||
+ unsigned long wmark;
|
||||
+ struct zone *zone = lruvec_pgdat(lruvec)->node_zones + i;
|
||||
+
|
||||
+ if (!managed_zone(zone))
|
||||
+ continue;
|
||||
+
|
||||
+ wmark = current_is_kswapd() ? high_wmark_pages(zone) : low_wmark_pages(zone);
|
||||
+ if (wmark > zone_page_state(zone, NR_FREE_PAGES))
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ sc->nr_reclaimed += MIN_LRU_BATCH;
|
||||
+
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
|
||||
{
|
||||
struct blk_plug plug;
|
||||
+ bool need_aging = false;
|
||||
+ bool need_swapping = false;
|
||||
unsigned long scanned = 0;
|
||||
+ unsigned long reclaimed = sc->nr_reclaimed;
|
||||
+ DEFINE_MAX_SEQ(lruvec);
|
||||
|
||||
lru_add_drain();
|
||||
|
||||
@@ -4965,21 +5044,28 @@ static void lru_gen_shrink_lruvec(struct
|
||||
else
|
||||
swappiness = 0;
|
||||
|
||||
- nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness);
|
||||
+ nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness, &need_aging);
|
||||
if (!nr_to_scan)
|
||||
- break;
|
||||
+ goto done;
|
||||
|
||||
- delta = evict_folios(lruvec, sc, swappiness);
|
||||
+ delta = evict_folios(lruvec, sc, swappiness, &need_swapping);
|
||||
if (!delta)
|
||||
- break;
|
||||
+ goto done;
|
||||
|
||||
scanned += delta;
|
||||
if (scanned >= nr_to_scan)
|
||||
break;
|
||||
|
||||
+ if (should_abort_scan(lruvec, max_seq, sc, need_swapping))
|
||||
+ break;
|
||||
+
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
+ /* see the comment in lru_gen_age_node() */
|
||||
+ if (sc->nr_reclaimed - reclaimed >= MIN_LRU_BATCH && !need_aging)
|
||||
+ sc->memcgs_need_aging = false;
|
||||
+done:
|
||||
clear_mm_walk();
|
||||
|
||||
blk_finish_plug(&plug);
|
@ -1,475 +0,0 @@
|
||||
From ef61bb3622ee0f36e055dfd5006badff08f5ce61 Mon Sep 17 00:00:00 2001
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
Date: Thu, 27 Jan 2022 19:52:09 -0700
|
||||
Subject: [PATCH 10/14] mm: multi-gen LRU: kill switch
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Add /sys/kernel/mm/lru_gen/enabled as a kill switch. Components that
|
||||
can be disabled include:
|
||||
0x0001: the multi-gen LRU core
|
||||
0x0002: walking page table, when arch_has_hw_pte_young() returns
|
||||
true
|
||||
0x0004: clearing the accessed bit in non-leaf PMD entries, when
|
||||
CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG=y
|
||||
[yYnN]: apply to all the components above
|
||||
E.g.,
|
||||
echo y >/sys/kernel/mm/lru_gen/enabled
|
||||
cat /sys/kernel/mm/lru_gen/enabled
|
||||
0x0007
|
||||
echo 5 >/sys/kernel/mm/lru_gen/enabled
|
||||
cat /sys/kernel/mm/lru_gen/enabled
|
||||
0x0005
|
||||
|
||||
NB: the page table walks happen on the scale of seconds under heavy
|
||||
memory pressure, in which case the mmap_lock contention is a lesser
|
||||
concern, compared with the LRU lock contention and the I/O congestion.
|
||||
So far the only well-known case of the mmap_lock contention happens on
|
||||
Android, due to Scudo [1] which allocates several thousand VMAs for
|
||||
merely a few hundred MBs. The SPF and the Maple Tree also have
|
||||
provided their own assessments [2][3]. However, if walking page tables
|
||||
does worsen the mmap_lock contention, the kill switch can be used to
|
||||
disable it. In this case the multi-gen LRU will suffer a minor
|
||||
performance degradation, as shown previously.
|
||||
|
||||
Clearing the accessed bit in non-leaf PMD entries can also be
|
||||
disabled, since this behavior was not tested on x86 varieties other
|
||||
than Intel and AMD.
|
||||
|
||||
[1] https://source.android.com/devices/tech/debug/scudo
|
||||
[2] https://lore.kernel.org/r/20220128131006.67712-1-michel@lespinasse.org/
|
||||
[3] https://lore.kernel.org/r/20220426150616.3937571-1-Liam.Howlett@oracle.com/
|
||||
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Acked-by: Brian Geffon <bgeffon@google.com>
|
||||
Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
|
||||
Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
|
||||
Acked-by: Steven Barrett <steven@liquorix.net>
|
||||
Acked-by: Suleiman Souhlal <suleiman@google.com>
|
||||
Tested-by: Daniel Byrne <djbyrne@mtu.edu>
|
||||
Tested-by: Donald Carr <d@chaos-reins.com>
|
||||
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
|
||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
|
||||
Tested-by: Sofia Trinh <sofia.trinh@edi.works>
|
||||
Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
|
||||
Change-Id: I4c909618e8fed7fb1337f6624bbe542ec920a515
|
||||
---
|
||||
include/linux/cgroup.h | 15 ++-
|
||||
include/linux/mm_inline.h | 15 ++-
|
||||
include/linux/mmzone.h | 9 ++
|
||||
kernel/cgroup/cgroup-internal.h | 1 -
|
||||
mm/Kconfig | 6 +
|
||||
mm/vmscan.c | 228 +++++++++++++++++++++++++++++++-
|
||||
6 files changed, 265 insertions(+), 9 deletions(-)
|
||||
|
||||
--- a/include/linux/cgroup.h
|
||||
+++ b/include/linux/cgroup.h
|
||||
@@ -432,6 +432,18 @@ static inline void cgroup_put(struct cgr
|
||||
css_put(&cgrp->self);
|
||||
}
|
||||
|
||||
+extern struct mutex cgroup_mutex;
|
||||
+
|
||||
+static inline void cgroup_lock(void)
|
||||
+{
|
||||
+ mutex_lock(&cgroup_mutex);
|
||||
+}
|
||||
+
|
||||
+static inline void cgroup_unlock(void)
|
||||
+{
|
||||
+ mutex_unlock(&cgroup_mutex);
|
||||
+}
|
||||
+
|
||||
/**
|
||||
* task_css_set_check - obtain a task's css_set with extra access conditions
|
||||
* @task: the task to obtain css_set for
|
||||
@@ -446,7 +458,6 @@ static inline void cgroup_put(struct cgr
|
||||
* as locks used during the cgroup_subsys::attach() methods.
|
||||
*/
|
||||
#ifdef CONFIG_PROVE_RCU
|
||||
-extern struct mutex cgroup_mutex;
|
||||
extern spinlock_t css_set_lock;
|
||||
#define task_css_set_check(task, __c) \
|
||||
rcu_dereference_check((task)->cgroups, \
|
||||
@@ -708,6 +719,8 @@ struct cgroup;
|
||||
static inline u64 cgroup_id(const struct cgroup *cgrp) { return 1; }
|
||||
static inline void css_get(struct cgroup_subsys_state *css) {}
|
||||
static inline void css_put(struct cgroup_subsys_state *css) {}
|
||||
+static inline void cgroup_lock(void) {}
|
||||
+static inline void cgroup_unlock(void) {}
|
||||
static inline int cgroup_attach_task_all(struct task_struct *from,
|
||||
struct task_struct *t) { return 0; }
|
||||
static inline int cgroupstats_build(struct cgroupstats *stats,
|
||||
--- a/include/linux/mm_inline.h
|
||||
+++ b/include/linux/mm_inline.h
|
||||
@@ -106,10 +106,21 @@ static __always_inline enum lru_list fol
|
||||
|
||||
#ifdef CONFIG_LRU_GEN
|
||||
|
||||
+#ifdef CONFIG_LRU_GEN_ENABLED
|
||||
static inline bool lru_gen_enabled(void)
|
||||
{
|
||||
- return true;
|
||||
+ DECLARE_STATIC_KEY_TRUE(lru_gen_caps[NR_LRU_GEN_CAPS]);
|
||||
+
|
||||
+ return static_branch_likely(&lru_gen_caps[LRU_GEN_CORE]);
|
||||
+}
|
||||
+#else
|
||||
+static inline bool lru_gen_enabled(void)
|
||||
+{
|
||||
+ DECLARE_STATIC_KEY_FALSE(lru_gen_caps[NR_LRU_GEN_CAPS]);
|
||||
+
|
||||
+ return static_branch_unlikely(&lru_gen_caps[LRU_GEN_CORE]);
|
||||
}
|
||||
+#endif
|
||||
|
||||
static inline bool lru_gen_in_fault(void)
|
||||
{
|
||||
@@ -222,7 +233,7 @@ static inline bool lru_gen_add_folio(str
|
||||
|
||||
VM_WARN_ON_ONCE_FOLIO(gen != -1, folio);
|
||||
|
||||
- if (folio_test_unevictable(folio))
|
||||
+ if (folio_test_unevictable(folio) || !lrugen->enabled)
|
||||
return false;
|
||||
/*
|
||||
* There are three common cases for this page:
|
||||
--- a/include/linux/mmzone.h
|
||||
+++ b/include/linux/mmzone.h
|
||||
@@ -384,6 +384,13 @@ enum {
|
||||
LRU_GEN_FILE,
|
||||
};
|
||||
|
||||
+enum {
|
||||
+ LRU_GEN_CORE,
|
||||
+ LRU_GEN_MM_WALK,
|
||||
+ LRU_GEN_NONLEAF_YOUNG,
|
||||
+ NR_LRU_GEN_CAPS
|
||||
+};
|
||||
+
|
||||
#define MIN_LRU_BATCH BITS_PER_LONG
|
||||
#define MAX_LRU_BATCH (MIN_LRU_BATCH * 64)
|
||||
|
||||
@@ -425,6 +432,8 @@ struct lru_gen_struct {
|
||||
/* can be modified without holding the LRU lock */
|
||||
atomic_long_t evicted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
|
||||
atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
|
||||
+ /* whether the multi-gen LRU is enabled */
|
||||
+ bool enabled;
|
||||
};
|
||||
|
||||
enum {
|
||||
--- a/kernel/cgroup/cgroup-internal.h
|
||||
+++ b/kernel/cgroup/cgroup-internal.h
|
||||
@@ -164,7 +164,6 @@ struct cgroup_mgctx {
|
||||
#define DEFINE_CGROUP_MGCTX(name) \
|
||||
struct cgroup_mgctx name = CGROUP_MGCTX_INIT(name)
|
||||
|
||||
-extern struct mutex cgroup_mutex;
|
||||
extern spinlock_t css_set_lock;
|
||||
extern struct cgroup_subsys *cgroup_subsys[];
|
||||
extern struct list_head cgroup_roots;
|
||||
--- a/mm/Kconfig
|
||||
+++ b/mm/Kconfig
|
||||
@@ -1133,6 +1133,12 @@ config LRU_GEN
|
||||
help
|
||||
A high performance LRU implementation to overcommit memory.
|
||||
|
||||
+config LRU_GEN_ENABLED
|
||||
+ bool "Enable by default"
|
||||
+ depends on LRU_GEN
|
||||
+ help
|
||||
+ This option enables the multi-gen LRU by default.
|
||||
+
|
||||
config LRU_GEN_STATS
|
||||
bool "Full stats for debugging"
|
||||
depends on LRU_GEN
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -51,6 +51,7 @@
|
||||
#include <linux/psi.h>
|
||||
#include <linux/pagewalk.h>
|
||||
#include <linux/shmem_fs.h>
|
||||
+#include <linux/ctype.h>
|
||||
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/div64.h>
|
||||
@@ -3070,6 +3071,14 @@ static bool can_age_anon_pages(struct pg
|
||||
|
||||
#ifdef CONFIG_LRU_GEN
|
||||
|
||||
+#ifdef CONFIG_LRU_GEN_ENABLED
|
||||
+DEFINE_STATIC_KEY_ARRAY_TRUE(lru_gen_caps, NR_LRU_GEN_CAPS);
|
||||
+#define get_cap(cap) static_branch_likely(&lru_gen_caps[cap])
|
||||
+#else
|
||||
+DEFINE_STATIC_KEY_ARRAY_FALSE(lru_gen_caps, NR_LRU_GEN_CAPS);
|
||||
+#define get_cap(cap) static_branch_unlikely(&lru_gen_caps[cap])
|
||||
+#endif
|
||||
+
|
||||
/******************************************************************************
|
||||
* shorthand helpers
|
||||
******************************************************************************/
|
||||
@@ -3946,7 +3955,8 @@ static void walk_pmd_range_locked(pud_t
|
||||
goto next;
|
||||
|
||||
if (!pmd_trans_huge(pmd[i])) {
|
||||
- if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG))
|
||||
+ if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) &&
|
||||
+ get_cap(LRU_GEN_NONLEAF_YOUNG))
|
||||
pmdp_test_and_clear_young(vma, addr, pmd + i);
|
||||
goto next;
|
||||
}
|
||||
@@ -4044,10 +4054,12 @@ restart:
|
||||
walk->mm_stats[MM_NONLEAF_TOTAL]++;
|
||||
|
||||
#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG
|
||||
- if (!pmd_young(val))
|
||||
- continue;
|
||||
+ if (get_cap(LRU_GEN_NONLEAF_YOUNG)) {
|
||||
+ if (!pmd_young(val))
|
||||
+ continue;
|
||||
|
||||
- walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos);
|
||||
+ walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos);
|
||||
+ }
|
||||
#endif
|
||||
if (!walk->force_scan && !test_bloom_filter(walk->lruvec, walk->max_seq, pmd + i))
|
||||
continue;
|
||||
@@ -4309,7 +4321,7 @@ static bool try_to_inc_max_seq(struct lr
|
||||
* handful of PTEs. Spreading the work out over a period of time usually
|
||||
* is less efficient, but it avoids bursty page faults.
|
||||
*/
|
||||
- if (!arch_has_hw_pte_young()) {
|
||||
+ if (!(arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK))) {
|
||||
success = iterate_mm_list_nowalk(lruvec, max_seq);
|
||||
goto done;
|
||||
}
|
||||
@@ -5072,6 +5084,208 @@ done:
|
||||
}
|
||||
|
||||
/******************************************************************************
|
||||
+ * state change
|
||||
+ ******************************************************************************/
|
||||
+
|
||||
+static bool __maybe_unused state_is_valid(struct lruvec *lruvec)
|
||||
+{
|
||||
+ struct lru_gen_struct *lrugen = &lruvec->lrugen;
|
||||
+
|
||||
+ if (lrugen->enabled) {
|
||||
+ enum lru_list lru;
|
||||
+
|
||||
+ for_each_evictable_lru(lru) {
|
||||
+ if (!list_empty(&lruvec->lists[lru]))
|
||||
+ return false;
|
||||
+ }
|
||||
+ } else {
|
||||
+ int gen, type, zone;
|
||||
+
|
||||
+ for_each_gen_type_zone(gen, type, zone) {
|
||||
+ if (!list_empty(&lrugen->lists[gen][type][zone]))
|
||||
+ return false;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+static bool fill_evictable(struct lruvec *lruvec)
|
||||
+{
|
||||
+ enum lru_list lru;
|
||||
+ int remaining = MAX_LRU_BATCH;
|
||||
+
|
||||
+ for_each_evictable_lru(lru) {
|
||||
+ int type = is_file_lru(lru);
|
||||
+ bool active = is_active_lru(lru);
|
||||
+ struct list_head *head = &lruvec->lists[lru];
|
||||
+
|
||||
+ while (!list_empty(head)) {
|
||||
+ bool success;
|
||||
+ struct folio *folio = lru_to_folio(head);
|
||||
+
|
||||
+ VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
|
||||
+ VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio) != active, folio);
|
||||
+ VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio);
|
||||
+ VM_WARN_ON_ONCE_FOLIO(folio_lru_gen(folio) != -1, folio);
|
||||
+
|
||||
+ lruvec_del_folio(lruvec, folio);
|
||||
+ success = lru_gen_add_folio(lruvec, folio, false);
|
||||
+ VM_WARN_ON_ONCE(!success);
|
||||
+
|
||||
+ if (!--remaining)
|
||||
+ return false;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+static bool drain_evictable(struct lruvec *lruvec)
|
||||
+{
|
||||
+ int gen, type, zone;
|
||||
+ int remaining = MAX_LRU_BATCH;
|
||||
+
|
||||
+ for_each_gen_type_zone(gen, type, zone) {
|
||||
+ struct list_head *head = &lruvec->lrugen.lists[gen][type][zone];
|
||||
+
|
||||
+ while (!list_empty(head)) {
|
||||
+ bool success;
|
||||
+ struct folio *folio = lru_to_folio(head);
|
||||
+
|
||||
+ VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
|
||||
+ VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
|
||||
+ VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio);
|
||||
+ VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio);
|
||||
+
|
||||
+ success = lru_gen_del_folio(lruvec, folio, false);
|
||||
+ VM_WARN_ON_ONCE(!success);
|
||||
+ lruvec_add_folio(lruvec, folio);
|
||||
+
|
||||
+ if (!--remaining)
|
||||
+ return false;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+static void lru_gen_change_state(bool enabled)
|
||||
+{
|
||||
+ static DEFINE_MUTEX(state_mutex);
|
||||
+
|
||||
+ struct mem_cgroup *memcg;
|
||||
+
|
||||
+ cgroup_lock();
|
||||
+ cpus_read_lock();
|
||||
+ get_online_mems();
|
||||
+ mutex_lock(&state_mutex);
|
||||
+
|
||||
+ if (enabled == lru_gen_enabled())
|
||||
+ goto unlock;
|
||||
+
|
||||
+ if (enabled)
|
||||
+ static_branch_enable_cpuslocked(&lru_gen_caps[LRU_GEN_CORE]);
|
||||
+ else
|
||||
+ static_branch_disable_cpuslocked(&lru_gen_caps[LRU_GEN_CORE]);
|
||||
+
|
||||
+ memcg = mem_cgroup_iter(NULL, NULL, NULL);
|
||||
+ do {
|
||||
+ int nid;
|
||||
+
|
||||
+ for_each_node(nid) {
|
||||
+ struct lruvec *lruvec = get_lruvec(memcg, nid);
|
||||
+
|
||||
+ if (!lruvec)
|
||||
+ continue;
|
||||
+
|
||||
+ spin_lock_irq(&lruvec->lru_lock);
|
||||
+
|
||||
+ VM_WARN_ON_ONCE(!seq_is_valid(lruvec));
|
||||
+ VM_WARN_ON_ONCE(!state_is_valid(lruvec));
|
||||
+
|
||||
+ lruvec->lrugen.enabled = enabled;
|
||||
+
|
||||
+ while (!(enabled ? fill_evictable(lruvec) : drain_evictable(lruvec))) {
|
||||
+ spin_unlock_irq(&lruvec->lru_lock);
|
||||
+ cond_resched();
|
||||
+ spin_lock_irq(&lruvec->lru_lock);
|
||||
+ }
|
||||
+
|
||||
+ spin_unlock_irq(&lruvec->lru_lock);
|
||||
+ }
|
||||
+
|
||||
+ cond_resched();
|
||||
+ } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
|
||||
+unlock:
|
||||
+ mutex_unlock(&state_mutex);
|
||||
+ put_online_mems();
|
||||
+ cpus_read_unlock();
|
||||
+ cgroup_unlock();
|
||||
+}
|
||||
+
|
||||
+/******************************************************************************
|
||||
+ * sysfs interface
|
||||
+ ******************************************************************************/
|
||||
+
|
||||
+static ssize_t show_enabled(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
|
||||
+{
|
||||
+ unsigned int caps = 0;
|
||||
+
|
||||
+ if (get_cap(LRU_GEN_CORE))
|
||||
+ caps |= BIT(LRU_GEN_CORE);
|
||||
+
|
||||
+ if (arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK))
|
||||
+ caps |= BIT(LRU_GEN_MM_WALK);
|
||||
+
|
||||
+ if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && get_cap(LRU_GEN_NONLEAF_YOUNG))
|
||||
+ caps |= BIT(LRU_GEN_NONLEAF_YOUNG);
|
||||
+
|
||||
+ return snprintf(buf, PAGE_SIZE, "0x%04x\n", caps);
|
||||
+}
|
||||
+
|
||||
+static ssize_t store_enabled(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
+ const char *buf, size_t len)
|
||||
+{
|
||||
+ int i;
|
||||
+ unsigned int caps;
|
||||
+
|
||||
+ if (tolower(*buf) == 'n')
|
||||
+ caps = 0;
|
||||
+ else if (tolower(*buf) == 'y')
|
||||
+ caps = -1;
|
||||
+ else if (kstrtouint(buf, 0, &caps))
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ for (i = 0; i < NR_LRU_GEN_CAPS; i++) {
|
||||
+ bool enabled = caps & BIT(i);
|
||||
+
|
||||
+ if (i == LRU_GEN_CORE)
|
||||
+ lru_gen_change_state(enabled);
|
||||
+ else if (enabled)
|
||||
+ static_branch_enable(&lru_gen_caps[i]);
|
||||
+ else
|
||||
+ static_branch_disable(&lru_gen_caps[i]);
|
||||
+ }
|
||||
+
|
||||
+ return len;
|
||||
+}
|
||||
+
|
||||
+static struct kobj_attribute lru_gen_enabled_attr = __ATTR(
|
||||
+ enabled, 0644, show_enabled, store_enabled
|
||||
+);
|
||||
+
|
||||
+static struct attribute *lru_gen_attrs[] = {
|
||||
+ &lru_gen_enabled_attr.attr,
|
||||
+ NULL
|
||||
+};
|
||||
+
|
||||
+static struct attribute_group lru_gen_attr_group = {
|
||||
+ .name = "lru_gen",
|
||||
+ .attrs = lru_gen_attrs,
|
||||
+};
|
||||
+
|
||||
+/******************************************************************************
|
||||
* initialization
|
||||
******************************************************************************/
|
||||
|
||||
@@ -5081,6 +5295,7 @@ void lru_gen_init_lruvec(struct lruvec *
|
||||
struct lru_gen_struct *lrugen = &lruvec->lrugen;
|
||||
|
||||
lrugen->max_seq = MIN_NR_GENS + 1;
|
||||
+ lrugen->enabled = lru_gen_enabled();
|
||||
|
||||
for_each_gen_type_zone(gen, type, zone)
|
||||
INIT_LIST_HEAD(&lrugen->lists[gen][type][zone]);
|
||||
@@ -5120,6 +5335,9 @@ static int __init init_lru_gen(void)
|
||||
BUILD_BUG_ON(MIN_NR_GENS + 1 >= MAX_NR_GENS);
|
||||
BUILD_BUG_ON(BIT(LRU_GEN_WIDTH) <= MAX_NR_GENS);
|
||||
|
||||
+ if (sysfs_create_group(mm_kobj, &lru_gen_attr_group))
|
||||
+ pr_err("lru_gen: failed to create sysfs group\n");
|
||||
+
|
||||
return 0;
|
||||
};
|
||||
late_initcall(init_lru_gen);
|
@ -1,202 +0,0 @@
|
||||
From 9d92c76fb8ac09ff195024139575d8c4db66b672 Mon Sep 17 00:00:00 2001
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
Date: Thu, 27 Jan 2022 20:08:50 -0700
|
||||
Subject: [PATCH 11/14] mm: multi-gen LRU: thrashing prevention
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Add /sys/kernel/mm/lru_gen/min_ttl_ms for thrashing prevention, as
|
||||
requested by many desktop users [1].
|
||||
|
||||
When set to value N, it prevents the working set of N milliseconds
|
||||
from getting evicted. The OOM killer is triggered if this working set
|
||||
cannot be kept in memory. Based on the average human detectable lag
|
||||
(~100ms), N=1000 usually eliminates intolerable lags due to thrashing.
|
||||
Larger values like N=3000 make lags less noticeable at the risk of
|
||||
premature OOM kills.
|
||||
|
||||
Compared with the size-based approach [2], this time-based approach
|
||||
has the following advantages:
|
||||
1. It is easier to configure because it is agnostic to applications
|
||||
and memory sizes.
|
||||
2. It is more reliable because it is directly wired to the OOM killer.
|
||||
|
||||
[1] https://lore.kernel.org/r/Ydza%2FzXKY9ATRoh6@google.com/
|
||||
[2] https://lore.kernel.org/r/20101028191523.GA14972@google.com/
|
||||
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Acked-by: Brian Geffon <bgeffon@google.com>
|
||||
Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
|
||||
Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
|
||||
Acked-by: Steven Barrett <steven@liquorix.net>
|
||||
Acked-by: Suleiman Souhlal <suleiman@google.com>
|
||||
Tested-by: Daniel Byrne <djbyrne@mtu.edu>
|
||||
Tested-by: Donald Carr <d@chaos-reins.com>
|
||||
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
|
||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
|
||||
Tested-by: Sofia Trinh <sofia.trinh@edi.works>
|
||||
Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
|
||||
Change-Id: I007499d7e47374b59fd620e8c3962940bc9f788e
|
||||
---
|
||||
include/linux/mmzone.h | 2 ++
|
||||
mm/vmscan.c | 74 ++++++++++++++++++++++++++++++++++++++++--
|
||||
2 files changed, 73 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/include/linux/mmzone.h
|
||||
+++ b/include/linux/mmzone.h
|
||||
@@ -419,6 +419,8 @@ struct lru_gen_struct {
|
||||
unsigned long max_seq;
|
||||
/* the eviction increments the oldest generation numbers */
|
||||
unsigned long min_seq[ANON_AND_FILE];
|
||||
+ /* the birth time of each generation in jiffies */
|
||||
+ unsigned long timestamps[MAX_NR_GENS];
|
||||
/* the multi-gen LRU lists, lazily sorted on eviction */
|
||||
struct list_head lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
|
||||
/* the multi-gen LRU sizes, eventually consistent */
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -4293,6 +4293,7 @@ static void inc_max_seq(struct lruvec *l
|
||||
for (type = 0; type < ANON_AND_FILE; type++)
|
||||
reset_ctrl_pos(lruvec, type, false);
|
||||
|
||||
+ WRITE_ONCE(lrugen->timestamps[next], jiffies);
|
||||
/* make sure preceding modifications appear */
|
||||
smp_store_release(&lrugen->max_seq, lrugen->max_seq + 1);
|
||||
|
||||
@@ -4420,7 +4421,7 @@ static bool should_run_aging(struct lruv
|
||||
return false;
|
||||
}
|
||||
|
||||
-static void age_lruvec(struct lruvec *lruvec, struct scan_control *sc)
|
||||
+static bool age_lruvec(struct lruvec *lruvec, struct scan_control *sc, unsigned long min_ttl)
|
||||
{
|
||||
bool need_aging;
|
||||
unsigned long nr_to_scan;
|
||||
@@ -4434,16 +4435,36 @@ static void age_lruvec(struct lruvec *lr
|
||||
mem_cgroup_calculate_protection(NULL, memcg);
|
||||
|
||||
if (mem_cgroup_below_min(memcg))
|
||||
- return;
|
||||
+ return false;
|
||||
|
||||
need_aging = should_run_aging(lruvec, max_seq, min_seq, sc, swappiness, &nr_to_scan);
|
||||
+
|
||||
+ if (min_ttl) {
|
||||
+ int gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]);
|
||||
+ unsigned long birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
|
||||
+
|
||||
+ if (time_is_after_jiffies(birth + min_ttl))
|
||||
+ return false;
|
||||
+
|
||||
+ /* the size is likely too small to be helpful */
|
||||
+ if (!nr_to_scan && sc->priority != DEF_PRIORITY)
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
if (need_aging)
|
||||
try_to_inc_max_seq(lruvec, max_seq, sc, swappiness);
|
||||
+
|
||||
+ return true;
|
||||
}
|
||||
|
||||
+/* to protect the working set of the last N jiffies */
|
||||
+static unsigned long lru_gen_min_ttl __read_mostly;
|
||||
+
|
||||
static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
|
||||
{
|
||||
struct mem_cgroup *memcg;
|
||||
+ bool success = false;
|
||||
+ unsigned long min_ttl = READ_ONCE(lru_gen_min_ttl);
|
||||
|
||||
VM_WARN_ON_ONCE(!current_is_kswapd());
|
||||
|
||||
@@ -4466,12 +4487,32 @@ static void lru_gen_age_node(struct pgli
|
||||
do {
|
||||
struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
|
||||
|
||||
- age_lruvec(lruvec, sc);
|
||||
+ if (age_lruvec(lruvec, sc, min_ttl))
|
||||
+ success = true;
|
||||
|
||||
cond_resched();
|
||||
} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
|
||||
|
||||
clear_mm_walk();
|
||||
+
|
||||
+ /* check the order to exclude compaction-induced reclaim */
|
||||
+ if (success || !min_ttl || sc->order)
|
||||
+ return;
|
||||
+
|
||||
+ /*
|
||||
+ * The main goal is to OOM kill if every generation from all memcgs is
|
||||
+ * younger than min_ttl. However, another possibility is all memcgs are
|
||||
+ * either below min or empty.
|
||||
+ */
|
||||
+ if (mutex_trylock(&oom_lock)) {
|
||||
+ struct oom_control oc = {
|
||||
+ .gfp_mask = sc->gfp_mask,
|
||||
+ };
|
||||
+
|
||||
+ out_of_memory(&oc);
|
||||
+
|
||||
+ mutex_unlock(&oom_lock);
|
||||
+ }
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -5228,6 +5269,28 @@ unlock:
|
||||
* sysfs interface
|
||||
******************************************************************************/
|
||||
|
||||
+static ssize_t show_min_ttl(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
|
||||
+{
|
||||
+ return sprintf(buf, "%u\n", jiffies_to_msecs(READ_ONCE(lru_gen_min_ttl)));
|
||||
+}
|
||||
+
|
||||
+static ssize_t store_min_ttl(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
+ const char *buf, size_t len)
|
||||
+{
|
||||
+ unsigned int msecs;
|
||||
+
|
||||
+ if (kstrtouint(buf, 0, &msecs))
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ WRITE_ONCE(lru_gen_min_ttl, msecs_to_jiffies(msecs));
|
||||
+
|
||||
+ return len;
|
||||
+}
|
||||
+
|
||||
+static struct kobj_attribute lru_gen_min_ttl_attr = __ATTR(
|
||||
+ min_ttl_ms, 0644, show_min_ttl, store_min_ttl
|
||||
+);
|
||||
+
|
||||
static ssize_t show_enabled(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
unsigned int caps = 0;
|
||||
@@ -5276,6 +5339,7 @@ static struct kobj_attribute lru_gen_ena
|
||||
);
|
||||
|
||||
static struct attribute *lru_gen_attrs[] = {
|
||||
+ &lru_gen_min_ttl_attr.attr,
|
||||
&lru_gen_enabled_attr.attr,
|
||||
NULL
|
||||
};
|
||||
@@ -5291,12 +5355,16 @@ static struct attribute_group lru_gen_at
|
||||
|
||||
void lru_gen_init_lruvec(struct lruvec *lruvec)
|
||||
{
|
||||
+ int i;
|
||||
int gen, type, zone;
|
||||
struct lru_gen_struct *lrugen = &lruvec->lrugen;
|
||||
|
||||
lrugen->max_seq = MIN_NR_GENS + 1;
|
||||
lrugen->enabled = lru_gen_enabled();
|
||||
|
||||
+ for (i = 0; i <= MIN_NR_GENS + 1; i++)
|
||||
+ lrugen->timestamps[i] = jiffies;
|
||||
+
|
||||
for_each_gen_type_zone(gen, type, zone)
|
||||
INIT_LIST_HEAD(&lrugen->lists[gen][type][zone]);
|
||||
|
@ -1,557 +0,0 @@
|
||||
From d1e0e5fcdea16d4ceead496a0ea2fdbb6bc5bfe4 Mon Sep 17 00:00:00 2001
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
Date: Thu, 27 Jan 2022 20:12:41 -0700
|
||||
Subject: [PATCH 12/14] mm: multi-gen LRU: debugfs interface
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Add /sys/kernel/debug/lru_gen for working set estimation and proactive
|
||||
reclaim. These techniques are commonly used to optimize job scheduling
|
||||
(bin packing) in data centers [1][2].
|
||||
|
||||
Compared with the page table-based approach and the PFN-based
|
||||
approach, this lruvec-based approach has the following advantages:
|
||||
1. It offers better choices because it is aware of memcgs, NUMA nodes,
|
||||
shared mappings and unmapped page cache.
|
||||
2. It is more scalable because it is O(nr_hot_pages), whereas the
|
||||
PFN-based approach is O(nr_total_pages).
|
||||
|
||||
Add /sys/kernel/debug/lru_gen_full for debugging.
|
||||
|
||||
[1] https://dl.acm.org/doi/10.1145/3297858.3304053
|
||||
[2] https://dl.acm.org/doi/10.1145/3503222.3507731
|
||||
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Reviewed-by: Qi Zheng <zhengqi.arch@bytedance.com>
|
||||
Acked-by: Brian Geffon <bgeffon@google.com>
|
||||
Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
|
||||
Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
|
||||
Acked-by: Steven Barrett <steven@liquorix.net>
|
||||
Acked-by: Suleiman Souhlal <suleiman@google.com>
|
||||
Tested-by: Daniel Byrne <djbyrne@mtu.edu>
|
||||
Tested-by: Donald Carr <d@chaos-reins.com>
|
||||
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
|
||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
|
||||
Tested-by: Sofia Trinh <sofia.trinh@edi.works>
|
||||
Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
|
||||
Change-Id: I7bb06f14e0a94901a076cc3767d0855d4f1ea3ab
|
||||
---
|
||||
include/linux/nodemask.h | 1 +
|
||||
mm/vmscan.c | 411 ++++++++++++++++++++++++++++++++++++++-
|
||||
2 files changed, 402 insertions(+), 10 deletions(-)
|
||||
|
||||
--- a/include/linux/nodemask.h
|
||||
+++ b/include/linux/nodemask.h
|
||||
@@ -493,6 +493,7 @@ static inline int num_node_state(enum no
|
||||
#define first_online_node 0
|
||||
#define first_memory_node 0
|
||||
#define next_online_node(nid) (MAX_NUMNODES)
|
||||
+#define next_memory_node(nid) (MAX_NUMNODES)
|
||||
#define nr_node_ids 1U
|
||||
#define nr_online_nodes 1U
|
||||
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -52,6 +52,7 @@
|
||||
#include <linux/pagewalk.h>
|
||||
#include <linux/shmem_fs.h>
|
||||
#include <linux/ctype.h>
|
||||
+#include <linux/debugfs.h>
|
||||
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/div64.h>
|
||||
@@ -4197,12 +4198,40 @@ static void clear_mm_walk(void)
|
||||
kfree(walk);
|
||||
}
|
||||
|
||||
-static void inc_min_seq(struct lruvec *lruvec, int type)
|
||||
+static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap)
|
||||
{
|
||||
+ int zone;
|
||||
+ int remaining = MAX_LRU_BATCH;
|
||||
struct lru_gen_struct *lrugen = &lruvec->lrugen;
|
||||
+ int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]);
|
||||
+
|
||||
+ if (type == LRU_GEN_ANON && !can_swap)
|
||||
+ goto done;
|
||||
+
|
||||
+ /* prevent cold/hot inversion if force_scan is true */
|
||||
+ for (zone = 0; zone < MAX_NR_ZONES; zone++) {
|
||||
+ struct list_head *head = &lrugen->lists[old_gen][type][zone];
|
||||
+
|
||||
+ while (!list_empty(head)) {
|
||||
+ struct folio *folio = lru_to_folio(head);
|
||||
+
|
||||
+ VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
|
||||
+ VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
|
||||
+ VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio);
|
||||
+ VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio);
|
||||
|
||||
+ new_gen = folio_inc_gen(lruvec, folio, false);
|
||||
+ list_move_tail(&folio->lru, &lrugen->lists[new_gen][type][zone]);
|
||||
+
|
||||
+ if (!--remaining)
|
||||
+ return false;
|
||||
+ }
|
||||
+ }
|
||||
+done:
|
||||
reset_ctrl_pos(lruvec, type, true);
|
||||
WRITE_ONCE(lrugen->min_seq[type], lrugen->min_seq[type] + 1);
|
||||
+
|
||||
+ return true;
|
||||
}
|
||||
|
||||
static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap)
|
||||
@@ -4248,7 +4277,7 @@ next:
|
||||
return success;
|
||||
}
|
||||
|
||||
-static void inc_max_seq(struct lruvec *lruvec, bool can_swap)
|
||||
+static void inc_max_seq(struct lruvec *lruvec, bool can_swap, bool force_scan)
|
||||
{
|
||||
int prev, next;
|
||||
int type, zone;
|
||||
@@ -4262,9 +4291,13 @@ static void inc_max_seq(struct lruvec *l
|
||||
if (get_nr_gens(lruvec, type) != MAX_NR_GENS)
|
||||
continue;
|
||||
|
||||
- VM_WARN_ON_ONCE(type == LRU_GEN_FILE || can_swap);
|
||||
+ VM_WARN_ON_ONCE(!force_scan && (type == LRU_GEN_FILE || can_swap));
|
||||
|
||||
- inc_min_seq(lruvec, type);
|
||||
+ while (!inc_min_seq(lruvec, type, can_swap)) {
|
||||
+ spin_unlock_irq(&lruvec->lru_lock);
|
||||
+ cond_resched();
|
||||
+ spin_lock_irq(&lruvec->lru_lock);
|
||||
+ }
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -4301,7 +4334,7 @@ static void inc_max_seq(struct lruvec *l
|
||||
}
|
||||
|
||||
static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
|
||||
- struct scan_control *sc, bool can_swap)
|
||||
+ struct scan_control *sc, bool can_swap, bool force_scan)
|
||||
{
|
||||
bool success;
|
||||
struct lru_gen_mm_walk *walk;
|
||||
@@ -4322,7 +4355,7 @@ static bool try_to_inc_max_seq(struct lr
|
||||
* handful of PTEs. Spreading the work out over a period of time usually
|
||||
* is less efficient, but it avoids bursty page faults.
|
||||
*/
|
||||
- if (!(arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK))) {
|
||||
+ if (!force_scan && !(arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK))) {
|
||||
success = iterate_mm_list_nowalk(lruvec, max_seq);
|
||||
goto done;
|
||||
}
|
||||
@@ -4336,7 +4369,7 @@ static bool try_to_inc_max_seq(struct lr
|
||||
walk->lruvec = lruvec;
|
||||
walk->max_seq = max_seq;
|
||||
walk->can_swap = can_swap;
|
||||
- walk->force_scan = false;
|
||||
+ walk->force_scan = force_scan;
|
||||
|
||||
do {
|
||||
success = iterate_mm_list(lruvec, walk, &mm);
|
||||
@@ -4356,7 +4389,7 @@ done:
|
||||
|
||||
VM_WARN_ON_ONCE(max_seq != READ_ONCE(lrugen->max_seq));
|
||||
|
||||
- inc_max_seq(lruvec, can_swap);
|
||||
+ inc_max_seq(lruvec, can_swap, force_scan);
|
||||
/* either this sees any waiters or they will see updated max_seq */
|
||||
if (wq_has_sleeper(&lruvec->mm_state.wait))
|
||||
wake_up_all(&lruvec->mm_state.wait);
|
||||
@@ -4452,7 +4485,7 @@ static bool age_lruvec(struct lruvec *lr
|
||||
}
|
||||
|
||||
if (need_aging)
|
||||
- try_to_inc_max_seq(lruvec, max_seq, sc, swappiness);
|
||||
+ try_to_inc_max_seq(lruvec, max_seq, sc, swappiness, false);
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -5011,7 +5044,7 @@ static unsigned long get_nr_to_scan(stru
|
||||
if (current_is_kswapd())
|
||||
return 0;
|
||||
|
||||
- if (try_to_inc_max_seq(lruvec, max_seq, sc, can_swap))
|
||||
+ if (try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false))
|
||||
return nr_to_scan;
|
||||
done:
|
||||
return min_seq[!can_swap] + MIN_NR_GENS <= max_seq ? nr_to_scan : 0;
|
||||
@@ -5350,6 +5383,361 @@ static struct attribute_group lru_gen_at
|
||||
};
|
||||
|
||||
/******************************************************************************
|
||||
+ * debugfs interface
|
||||
+ ******************************************************************************/
|
||||
+
|
||||
+static void *lru_gen_seq_start(struct seq_file *m, loff_t *pos)
|
||||
+{
|
||||
+ struct mem_cgroup *memcg;
|
||||
+ loff_t nr_to_skip = *pos;
|
||||
+
|
||||
+ m->private = kvmalloc(PATH_MAX, GFP_KERNEL);
|
||||
+ if (!m->private)
|
||||
+ return ERR_PTR(-ENOMEM);
|
||||
+
|
||||
+ memcg = mem_cgroup_iter(NULL, NULL, NULL);
|
||||
+ do {
|
||||
+ int nid;
|
||||
+
|
||||
+ for_each_node_state(nid, N_MEMORY) {
|
||||
+ if (!nr_to_skip--)
|
||||
+ return get_lruvec(memcg, nid);
|
||||
+ }
|
||||
+ } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
|
||||
+
|
||||
+ return NULL;
|
||||
+}
|
||||
+
|
||||
+static void lru_gen_seq_stop(struct seq_file *m, void *v)
|
||||
+{
|
||||
+ if (!IS_ERR_OR_NULL(v))
|
||||
+ mem_cgroup_iter_break(NULL, lruvec_memcg(v));
|
||||
+
|
||||
+ kvfree(m->private);
|
||||
+ m->private = NULL;
|
||||
+}
|
||||
+
|
||||
+static void *lru_gen_seq_next(struct seq_file *m, void *v, loff_t *pos)
|
||||
+{
|
||||
+ int nid = lruvec_pgdat(v)->node_id;
|
||||
+ struct mem_cgroup *memcg = lruvec_memcg(v);
|
||||
+
|
||||
+ ++*pos;
|
||||
+
|
||||
+ nid = next_memory_node(nid);
|
||||
+ if (nid == MAX_NUMNODES) {
|
||||
+ memcg = mem_cgroup_iter(NULL, memcg, NULL);
|
||||
+ if (!memcg)
|
||||
+ return NULL;
|
||||
+
|
||||
+ nid = first_memory_node;
|
||||
+ }
|
||||
+
|
||||
+ return get_lruvec(memcg, nid);
|
||||
+}
|
||||
+
|
||||
+static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec,
|
||||
+ unsigned long max_seq, unsigned long *min_seq,
|
||||
+ unsigned long seq)
|
||||
+{
|
||||
+ int i;
|
||||
+ int type, tier;
|
||||
+ int hist = lru_hist_from_seq(seq);
|
||||
+ struct lru_gen_struct *lrugen = &lruvec->lrugen;
|
||||
+
|
||||
+ for (tier = 0; tier < MAX_NR_TIERS; tier++) {
|
||||
+ seq_printf(m, " %10d", tier);
|
||||
+ for (type = 0; type < ANON_AND_FILE; type++) {
|
||||
+ const char *s = " ";
|
||||
+ unsigned long n[3] = {};
|
||||
+
|
||||
+ if (seq == max_seq) {
|
||||
+ s = "RT ";
|
||||
+ n[0] = READ_ONCE(lrugen->avg_refaulted[type][tier]);
|
||||
+ n[1] = READ_ONCE(lrugen->avg_total[type][tier]);
|
||||
+ } else if (seq == min_seq[type] || NR_HIST_GENS > 1) {
|
||||
+ s = "rep";
|
||||
+ n[0] = atomic_long_read(&lrugen->refaulted[hist][type][tier]);
|
||||
+ n[1] = atomic_long_read(&lrugen->evicted[hist][type][tier]);
|
||||
+ if (tier)
|
||||
+ n[2] = READ_ONCE(lrugen->protected[hist][type][tier - 1]);
|
||||
+ }
|
||||
+
|
||||
+ for (i = 0; i < 3; i++)
|
||||
+ seq_printf(m, " %10lu%c", n[i], s[i]);
|
||||
+ }
|
||||
+ seq_putc(m, '\n');
|
||||
+ }
|
||||
+
|
||||
+ seq_puts(m, " ");
|
||||
+ for (i = 0; i < NR_MM_STATS; i++) {
|
||||
+ const char *s = " ";
|
||||
+ unsigned long n = 0;
|
||||
+
|
||||
+ if (seq == max_seq && NR_HIST_GENS == 1) {
|
||||
+ s = "LOYNFA";
|
||||
+ n = READ_ONCE(lruvec->mm_state.stats[hist][i]);
|
||||
+ } else if (seq != max_seq && NR_HIST_GENS > 1) {
|
||||
+ s = "loynfa";
|
||||
+ n = READ_ONCE(lruvec->mm_state.stats[hist][i]);
|
||||
+ }
|
||||
+
|
||||
+ seq_printf(m, " %10lu%c", n, s[i]);
|
||||
+ }
|
||||
+ seq_putc(m, '\n');
|
||||
+}
|
||||
+
|
||||
+static int lru_gen_seq_show(struct seq_file *m, void *v)
|
||||
+{
|
||||
+ unsigned long seq;
|
||||
+ bool full = !debugfs_real_fops(m->file)->write;
|
||||
+ struct lruvec *lruvec = v;
|
||||
+ struct lru_gen_struct *lrugen = &lruvec->lrugen;
|
||||
+ int nid = lruvec_pgdat(lruvec)->node_id;
|
||||
+ struct mem_cgroup *memcg = lruvec_memcg(lruvec);
|
||||
+ DEFINE_MAX_SEQ(lruvec);
|
||||
+ DEFINE_MIN_SEQ(lruvec);
|
||||
+
|
||||
+ if (nid == first_memory_node) {
|
||||
+ const char *path = memcg ? m->private : "";
|
||||
+
|
||||
+#ifdef CONFIG_MEMCG
|
||||
+ if (memcg)
|
||||
+ cgroup_path(memcg->css.cgroup, m->private, PATH_MAX);
|
||||
+#endif
|
||||
+ seq_printf(m, "memcg %5hu %s\n", mem_cgroup_id(memcg), path);
|
||||
+ }
|
||||
+
|
||||
+ seq_printf(m, " node %5d\n", nid);
|
||||
+
|
||||
+ if (!full)
|
||||
+ seq = min_seq[LRU_GEN_ANON];
|
||||
+ else if (max_seq >= MAX_NR_GENS)
|
||||
+ seq = max_seq - MAX_NR_GENS + 1;
|
||||
+ else
|
||||
+ seq = 0;
|
||||
+
|
||||
+ for (; seq <= max_seq; seq++) {
|
||||
+ int type, zone;
|
||||
+ int gen = lru_gen_from_seq(seq);
|
||||
+ unsigned long birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
|
||||
+
|
||||
+ seq_printf(m, " %10lu %10u", seq, jiffies_to_msecs(jiffies - birth));
|
||||
+
|
||||
+ for (type = 0; type < ANON_AND_FILE; type++) {
|
||||
+ unsigned long size = 0;
|
||||
+ char mark = full && seq < min_seq[type] ? 'x' : ' ';
|
||||
+
|
||||
+ for (zone = 0; zone < MAX_NR_ZONES; zone++)
|
||||
+ size += max(READ_ONCE(lrugen->nr_pages[gen][type][zone]), 0L);
|
||||
+
|
||||
+ seq_printf(m, " %10lu%c", size, mark);
|
||||
+ }
|
||||
+
|
||||
+ seq_putc(m, '\n');
|
||||
+
|
||||
+ if (full)
|
||||
+ lru_gen_seq_show_full(m, lruvec, max_seq, min_seq, seq);
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static const struct seq_operations lru_gen_seq_ops = {
|
||||
+ .start = lru_gen_seq_start,
|
||||
+ .stop = lru_gen_seq_stop,
|
||||
+ .next = lru_gen_seq_next,
|
||||
+ .show = lru_gen_seq_show,
|
||||
+};
|
||||
+
|
||||
+static int run_aging(struct lruvec *lruvec, unsigned long seq, struct scan_control *sc,
|
||||
+ bool can_swap, bool force_scan)
|
||||
+{
|
||||
+ DEFINE_MAX_SEQ(lruvec);
|
||||
+ DEFINE_MIN_SEQ(lruvec);
|
||||
+
|
||||
+ if (seq < max_seq)
|
||||
+ return 0;
|
||||
+
|
||||
+ if (seq > max_seq)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ if (!force_scan && min_seq[!can_swap] + MAX_NR_GENS - 1 <= max_seq)
|
||||
+ return -ERANGE;
|
||||
+
|
||||
+ try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, force_scan);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int run_eviction(struct lruvec *lruvec, unsigned long seq, struct scan_control *sc,
|
||||
+ int swappiness, unsigned long nr_to_reclaim)
|
||||
+{
|
||||
+ DEFINE_MAX_SEQ(lruvec);
|
||||
+
|
||||
+ if (seq + MIN_NR_GENS > max_seq)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ sc->nr_reclaimed = 0;
|
||||
+
|
||||
+ while (!signal_pending(current)) {
|
||||
+ DEFINE_MIN_SEQ(lruvec);
|
||||
+
|
||||
+ if (seq < min_seq[!swappiness])
|
||||
+ return 0;
|
||||
+
|
||||
+ if (sc->nr_reclaimed >= nr_to_reclaim)
|
||||
+ return 0;
|
||||
+
|
||||
+ if (!evict_folios(lruvec, sc, swappiness, NULL))
|
||||
+ return 0;
|
||||
+
|
||||
+ cond_resched();
|
||||
+ }
|
||||
+
|
||||
+ return -EINTR;
|
||||
+}
|
||||
+
|
||||
+static int run_cmd(char cmd, int memcg_id, int nid, unsigned long seq,
|
||||
+ struct scan_control *sc, int swappiness, unsigned long opt)
|
||||
+{
|
||||
+ struct lruvec *lruvec;
|
||||
+ int err = -EINVAL;
|
||||
+ struct mem_cgroup *memcg = NULL;
|
||||
+
|
||||
+ if (nid < 0 || nid >= MAX_NUMNODES || !node_state(nid, N_MEMORY))
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ if (!mem_cgroup_disabled()) {
|
||||
+ rcu_read_lock();
|
||||
+ memcg = mem_cgroup_from_id(memcg_id);
|
||||
+#ifdef CONFIG_MEMCG
|
||||
+ if (memcg && !css_tryget(&memcg->css))
|
||||
+ memcg = NULL;
|
||||
+#endif
|
||||
+ rcu_read_unlock();
|
||||
+
|
||||
+ if (!memcg)
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
+ if (memcg_id != mem_cgroup_id(memcg))
|
||||
+ goto done;
|
||||
+
|
||||
+ lruvec = get_lruvec(memcg, nid);
|
||||
+
|
||||
+ if (swappiness < 0)
|
||||
+ swappiness = get_swappiness(lruvec, sc);
|
||||
+ else if (swappiness > 200)
|
||||
+ goto done;
|
||||
+
|
||||
+ switch (cmd) {
|
||||
+ case '+':
|
||||
+ err = run_aging(lruvec, seq, sc, swappiness, opt);
|
||||
+ break;
|
||||
+ case '-':
|
||||
+ err = run_eviction(lruvec, seq, sc, swappiness, opt);
|
||||
+ break;
|
||||
+ }
|
||||
+done:
|
||||
+ mem_cgroup_put(memcg);
|
||||
+
|
||||
+ return err;
|
||||
+}
|
||||
+
|
||||
+static ssize_t lru_gen_seq_write(struct file *file, const char __user *src,
|
||||
+ size_t len, loff_t *pos)
|
||||
+{
|
||||
+ void *buf;
|
||||
+ char *cur, *next;
|
||||
+ unsigned int flags;
|
||||
+ struct blk_plug plug;
|
||||
+ int err = -EINVAL;
|
||||
+ struct scan_control sc = {
|
||||
+ .may_writepage = true,
|
||||
+ .may_unmap = true,
|
||||
+ .may_swap = true,
|
||||
+ .reclaim_idx = MAX_NR_ZONES - 1,
|
||||
+ .gfp_mask = GFP_KERNEL,
|
||||
+ };
|
||||
+
|
||||
+ buf = kvmalloc(len + 1, GFP_KERNEL);
|
||||
+ if (!buf)
|
||||
+ return -ENOMEM;
|
||||
+
|
||||
+ if (copy_from_user(buf, src, len)) {
|
||||
+ kvfree(buf);
|
||||
+ return -EFAULT;
|
||||
+ }
|
||||
+
|
||||
+ set_task_reclaim_state(current, &sc.reclaim_state);
|
||||
+ flags = memalloc_noreclaim_save();
|
||||
+ blk_start_plug(&plug);
|
||||
+ if (!set_mm_walk(NULL)) {
|
||||
+ err = -ENOMEM;
|
||||
+ goto done;
|
||||
+ }
|
||||
+
|
||||
+ next = buf;
|
||||
+ next[len] = '\0';
|
||||
+
|
||||
+ while ((cur = strsep(&next, ",;\n"))) {
|
||||
+ int n;
|
||||
+ int end;
|
||||
+ char cmd;
|
||||
+ unsigned int memcg_id;
|
||||
+ unsigned int nid;
|
||||
+ unsigned long seq;
|
||||
+ unsigned int swappiness = -1;
|
||||
+ unsigned long opt = -1;
|
||||
+
|
||||
+ cur = skip_spaces(cur);
|
||||
+ if (!*cur)
|
||||
+ continue;
|
||||
+
|
||||
+ n = sscanf(cur, "%c %u %u %lu %n %u %n %lu %n", &cmd, &memcg_id, &nid,
|
||||
+ &seq, &end, &swappiness, &end, &opt, &end);
|
||||
+ if (n < 4 || cur[end]) {
|
||||
+ err = -EINVAL;
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ err = run_cmd(cmd, memcg_id, nid, seq, &sc, swappiness, opt);
|
||||
+ if (err)
|
||||
+ break;
|
||||
+ }
|
||||
+done:
|
||||
+ clear_mm_walk();
|
||||
+ blk_finish_plug(&plug);
|
||||
+ memalloc_noreclaim_restore(flags);
|
||||
+ set_task_reclaim_state(current, NULL);
|
||||
+
|
||||
+ kvfree(buf);
|
||||
+
|
||||
+ return err ? : len;
|
||||
+}
|
||||
+
|
||||
+static int lru_gen_seq_open(struct inode *inode, struct file *file)
|
||||
+{
|
||||
+ return seq_open(file, &lru_gen_seq_ops);
|
||||
+}
|
||||
+
|
||||
+static const struct file_operations lru_gen_rw_fops = {
|
||||
+ .open = lru_gen_seq_open,
|
||||
+ .read = seq_read,
|
||||
+ .write = lru_gen_seq_write,
|
||||
+ .llseek = seq_lseek,
|
||||
+ .release = seq_release,
|
||||
+};
|
||||
+
|
||||
+static const struct file_operations lru_gen_ro_fops = {
|
||||
+ .open = lru_gen_seq_open,
|
||||
+ .read = seq_read,
|
||||
+ .llseek = seq_lseek,
|
||||
+ .release = seq_release,
|
||||
+};
|
||||
+
|
||||
+/******************************************************************************
|
||||
* initialization
|
||||
******************************************************************************/
|
||||
|
||||
@@ -5406,6 +5794,9 @@ static int __init init_lru_gen(void)
|
||||
if (sysfs_create_group(mm_kobj, &lru_gen_attr_group))
|
||||
pr_err("lru_gen: failed to create sysfs group\n");
|
||||
|
||||
+ debugfs_create_file("lru_gen", 0644, NULL, NULL, &lru_gen_rw_fops);
|
||||
+ debugfs_create_file("lru_gen_full", 0444, NULL, NULL, &lru_gen_ro_fops);
|
||||
+
|
||||
return 0;
|
||||
};
|
||||
late_initcall(init_lru_gen);
|
@ -1,253 +0,0 @@
|
||||
From 22199c9b30ffcc332be643577709a2af960e6786 Mon Sep 17 00:00:00 2001
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
Date: Sun, 23 Jan 2022 16:44:43 -0700
|
||||
Subject: [PATCH 13/14] mm: multi-gen LRU: admin guide
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Add an admin guide.
|
||||
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Acked-by: Brian Geffon <bgeffon@google.com>
|
||||
Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
|
||||
Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
|
||||
Acked-by: Steven Barrett <steven@liquorix.net>
|
||||
Acked-by: Suleiman Souhlal <suleiman@google.com>
|
||||
Tested-by: Daniel Byrne <djbyrne@mtu.edu>
|
||||
Tested-by: Donald Carr <d@chaos-reins.com>
|
||||
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
|
||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
|
||||
Tested-by: Sofia Trinh <sofia.trinh@edi.works>
|
||||
Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
|
||||
Change-Id: I1902178bcbb5adfa0a748c4d284a6456059bdd7e
|
||||
---
|
||||
Documentation/admin-guide/mm/index.rst | 1 +
|
||||
Documentation/admin-guide/mm/multigen_lru.rst | 162 ++++++++++++++++++
|
||||
mm/Kconfig | 3 +-
|
||||
mm/vmscan.c | 4 +
|
||||
4 files changed, 169 insertions(+), 1 deletion(-)
|
||||
create mode 100644 Documentation/admin-guide/mm/multigen_lru.rst
|
||||
|
||||
--- a/Documentation/admin-guide/mm/index.rst
|
||||
+++ b/Documentation/admin-guide/mm/index.rst
|
||||
@@ -32,6 +32,7 @@ the Linux memory management.
|
||||
idle_page_tracking
|
||||
ksm
|
||||
memory-hotplug
|
||||
+ multigen_lru
|
||||
nommu-mmap
|
||||
numa_memory_policy
|
||||
numaperf
|
||||
--- /dev/null
|
||||
+++ b/Documentation/admin-guide/mm/multigen_lru.rst
|
||||
@@ -0,0 +1,162 @@
|
||||
+.. SPDX-License-Identifier: GPL-2.0
|
||||
+
|
||||
+=============
|
||||
+Multi-Gen LRU
|
||||
+=============
|
||||
+The multi-gen LRU is an alternative LRU implementation that optimizes
|
||||
+page reclaim and improves performance under memory pressure. Page
|
||||
+reclaim decides the kernel's caching policy and ability to overcommit
|
||||
+memory. It directly impacts the kswapd CPU usage and RAM efficiency.
|
||||
+
|
||||
+Quick start
|
||||
+===========
|
||||
+Build the kernel with the following configurations.
|
||||
+
|
||||
+* ``CONFIG_LRU_GEN=y``
|
||||
+* ``CONFIG_LRU_GEN_ENABLED=y``
|
||||
+
|
||||
+All set!
|
||||
+
|
||||
+Runtime options
|
||||
+===============
|
||||
+``/sys/kernel/mm/lru_gen/`` contains stable ABIs described in the
|
||||
+following subsections.
|
||||
+
|
||||
+Kill switch
|
||||
+-----------
|
||||
+``enabled`` accepts different values to enable or disable the
|
||||
+following components. Its default value depends on
|
||||
+``CONFIG_LRU_GEN_ENABLED``. All the components should be enabled
|
||||
+unless some of them have unforeseen side effects. Writing to
|
||||
+``enabled`` has no effect when a component is not supported by the
|
||||
+hardware, and valid values will be accepted even when the main switch
|
||||
+is off.
|
||||
+
|
||||
+====== ===============================================================
|
||||
+Values Components
|
||||
+====== ===============================================================
|
||||
+0x0001 The main switch for the multi-gen LRU.
|
||||
+0x0002 Clearing the accessed bit in leaf page table entries in large
|
||||
+ batches, when MMU sets it (e.g., on x86). This behavior can
|
||||
+ theoretically worsen lock contention (mmap_lock). If it is
|
||||
+ disabled, the multi-gen LRU will suffer a minor performance
|
||||
+ degradation for workloads that contiguously map hot pages,
|
||||
+ whose accessed bits can be otherwise cleared by fewer larger
|
||||
+ batches.
|
||||
+0x0004 Clearing the accessed bit in non-leaf page table entries as
|
||||
+ well, when MMU sets it (e.g., on x86). This behavior was not
|
||||
+ verified on x86 varieties other than Intel and AMD. If it is
|
||||
+ disabled, the multi-gen LRU will suffer a negligible
|
||||
+ performance degradation.
|
||||
+[yYnN] Apply to all the components above.
|
||||
+====== ===============================================================
|
||||
+
|
||||
+E.g.,
|
||||
+::
|
||||
+
|
||||
+ echo y >/sys/kernel/mm/lru_gen/enabled
|
||||
+ cat /sys/kernel/mm/lru_gen/enabled
|
||||
+ 0x0007
|
||||
+ echo 5 >/sys/kernel/mm/lru_gen/enabled
|
||||
+ cat /sys/kernel/mm/lru_gen/enabled
|
||||
+ 0x0005
|
||||
+
|
||||
+Thrashing prevention
|
||||
+--------------------
|
||||
+Personal computers are more sensitive to thrashing because it can
|
||||
+cause janks (lags when rendering UI) and negatively impact user
|
||||
+experience. The multi-gen LRU offers thrashing prevention to the
|
||||
+majority of laptop and desktop users who do not have ``oomd``.
|
||||
+
|
||||
+Users can write ``N`` to ``min_ttl_ms`` to prevent the working set of
|
||||
+``N`` milliseconds from getting evicted. The OOM killer is triggered
|
||||
+if this working set cannot be kept in memory. In other words, this
|
||||
+option works as an adjustable pressure relief valve, and when open, it
|
||||
+terminates applications that are hopefully not being used.
|
||||
+
|
||||
+Based on the average human detectable lag (~100ms), ``N=1000`` usually
|
||||
+eliminates intolerable janks due to thrashing. Larger values like
|
||||
+``N=3000`` make janks less noticeable at the risk of premature OOM
|
||||
+kills.
|
||||
+
|
||||
+The default value ``0`` means disabled.
|
||||
+
|
||||
+Experimental features
|
||||
+=====================
|
||||
+``/sys/kernel/debug/lru_gen`` accepts commands described in the
|
||||
+following subsections. Multiple command lines are supported, so does
|
||||
+concatenation with delimiters ``,`` and ``;``.
|
||||
+
|
||||
+``/sys/kernel/debug/lru_gen_full`` provides additional stats for
|
||||
+debugging. ``CONFIG_LRU_GEN_STATS=y`` keeps historical stats from
|
||||
+evicted generations in this file.
|
||||
+
|
||||
+Working set estimation
|
||||
+----------------------
|
||||
+Working set estimation measures how much memory an application needs
|
||||
+in a given time interval, and it is usually done with little impact on
|
||||
+the performance of the application. E.g., data centers want to
|
||||
+optimize job scheduling (bin packing) to improve memory utilizations.
|
||||
+When a new job comes in, the job scheduler needs to find out whether
|
||||
+each server it manages can allocate a certain amount of memory for
|
||||
+this new job before it can pick a candidate. To do so, the job
|
||||
+scheduler needs to estimate the working sets of the existing jobs.
|
||||
+
|
||||
+When it is read, ``lru_gen`` returns a histogram of numbers of pages
|
||||
+accessed over different time intervals for each memcg and node.
|
||||
+``MAX_NR_GENS`` decides the number of bins for each histogram. The
|
||||
+histograms are noncumulative.
|
||||
+::
|
||||
+
|
||||
+ memcg memcg_id memcg_path
|
||||
+ node node_id
|
||||
+ min_gen_nr age_in_ms nr_anon_pages nr_file_pages
|
||||
+ ...
|
||||
+ max_gen_nr age_in_ms nr_anon_pages nr_file_pages
|
||||
+
|
||||
+Each bin contains an estimated number of pages that have been accessed
|
||||
+within ``age_in_ms``. E.g., ``min_gen_nr`` contains the coldest pages
|
||||
+and ``max_gen_nr`` contains the hottest pages, since ``age_in_ms`` of
|
||||
+the former is the largest and that of the latter is the smallest.
|
||||
+
|
||||
+Users can write the following command to ``lru_gen`` to create a new
|
||||
+generation ``max_gen_nr+1``:
|
||||
+
|
||||
+ ``+ memcg_id node_id max_gen_nr [can_swap [force_scan]]``
|
||||
+
|
||||
+``can_swap`` defaults to the swap setting and, if it is set to ``1``,
|
||||
+it forces the scan of anon pages when swap is off, and vice versa.
|
||||
+``force_scan`` defaults to ``1`` and, if it is set to ``0``, it
|
||||
+employs heuristics to reduce the overhead, which is likely to reduce
|
||||
+the coverage as well.
|
||||
+
|
||||
+A typical use case is that a job scheduler runs this command at a
|
||||
+certain time interval to create new generations, and it ranks the
|
||||
+servers it manages based on the sizes of their cold pages defined by
|
||||
+this time interval.
|
||||
+
|
||||
+Proactive reclaim
|
||||
+-----------------
|
||||
+Proactive reclaim induces page reclaim when there is no memory
|
||||
+pressure. It usually targets cold pages only. E.g., when a new job
|
||||
+comes in, the job scheduler wants to proactively reclaim cold pages on
|
||||
+the server it selected, to improve the chance of successfully landing
|
||||
+this new job.
|
||||
+
|
||||
+Users can write the following command to ``lru_gen`` to evict
|
||||
+generations less than or equal to ``min_gen_nr``.
|
||||
+
|
||||
+ ``- memcg_id node_id min_gen_nr [swappiness [nr_to_reclaim]]``
|
||||
+
|
||||
+``min_gen_nr`` should be less than ``max_gen_nr-1``, since
|
||||
+``max_gen_nr`` and ``max_gen_nr-1`` are not fully aged (equivalent to
|
||||
+the active list) and therefore cannot be evicted. ``swappiness``
|
||||
+overrides the default value in ``/proc/sys/vm/swappiness``.
|
||||
+``nr_to_reclaim`` limits the number of pages to evict.
|
||||
+
|
||||
+A typical use case is that a job scheduler runs this command before it
|
||||
+tries to land a new job on a server. If it fails to materialize enough
|
||||
+cold pages because of the overestimation, it retries on the next
|
||||
+server according to the ranking result obtained from the working set
|
||||
+estimation step. This less forceful approach limits the impacts on the
|
||||
+existing jobs.
|
||||
--- a/mm/Kconfig
|
||||
+++ b/mm/Kconfig
|
||||
@@ -1131,7 +1131,8 @@ config LRU_GEN
|
||||
# make sure folio->flags has enough spare bits
|
||||
depends on 64BIT || !SPARSEMEM || SPARSEMEM_VMEMMAP
|
||||
help
|
||||
- A high performance LRU implementation to overcommit memory.
|
||||
+ A high performance LRU implementation to overcommit memory. See
|
||||
+ Documentation/admin-guide/mm/multigen_lru.rst for details.
|
||||
|
||||
config LRU_GEN_ENABLED
|
||||
bool "Enable by default"
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -5307,6 +5307,7 @@ static ssize_t show_min_ttl(struct kobje
|
||||
return sprintf(buf, "%u\n", jiffies_to_msecs(READ_ONCE(lru_gen_min_ttl)));
|
||||
}
|
||||
|
||||
+/* see Documentation/admin-guide/mm/multigen_lru.rst for details */
|
||||
static ssize_t store_min_ttl(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
const char *buf, size_t len)
|
||||
{
|
||||
@@ -5340,6 +5341,7 @@ static ssize_t show_enabled(struct kobje
|
||||
return snprintf(buf, PAGE_SIZE, "0x%04x\n", caps);
|
||||
}
|
||||
|
||||
+/* see Documentation/admin-guide/mm/multigen_lru.rst for details */
|
||||
static ssize_t store_enabled(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
const char *buf, size_t len)
|
||||
{
|
||||
@@ -5487,6 +5489,7 @@ static void lru_gen_seq_show_full(struct
|
||||
seq_putc(m, '\n');
|
||||
}
|
||||
|
||||
+/* see Documentation/admin-guide/mm/multigen_lru.rst for details */
|
||||
static int lru_gen_seq_show(struct seq_file *m, void *v)
|
||||
{
|
||||
unsigned long seq;
|
||||
@@ -5645,6 +5648,7 @@ done:
|
||||
return err;
|
||||
}
|
||||
|
||||
+/* see Documentation/admin-guide/mm/multigen_lru.rst for details */
|
||||
static ssize_t lru_gen_seq_write(struct file *file, const char __user *src,
|
||||
size_t len, loff_t *pos)
|
||||
{
|
@ -1,202 +0,0 @@
|
||||
From bd82a74f6b5c0a75ef61be5e9be34319bb17328f Mon Sep 17 00:00:00 2001
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
Date: Sun, 6 Mar 2022 20:35:00 -0700
|
||||
Subject: [PATCH 14/14] mm: multi-gen LRU: design doc
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Add a design doc.
|
||||
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Acked-by: Brian Geffon <bgeffon@google.com>
|
||||
Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
|
||||
Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
|
||||
Acked-by: Steven Barrett <steven@liquorix.net>
|
||||
Acked-by: Suleiman Souhlal <suleiman@google.com>
|
||||
Tested-by: Daniel Byrne <djbyrne@mtu.edu>
|
||||
Tested-by: Donald Carr <d@chaos-reins.com>
|
||||
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
|
||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
|
||||
Tested-by: Sofia Trinh <sofia.trinh@edi.works>
|
||||
Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
|
||||
Change-Id: I958afcabf5abc37b3e58f72638d35a349c31b98d
|
||||
---
|
||||
Documentation/mm/index.rst | 1 +
|
||||
Documentation/mm/multigen_lru.rst | 159 ++++++++++++++++++++++++++++++
|
||||
2 files changed, 160 insertions(+)
|
||||
create mode 100644 Documentation/mm/multigen_lru.rst
|
||||
|
||||
--- a/Documentation/mm/index.rst
|
||||
+++ b/Documentation/mm/index.rst
|
||||
@@ -51,6 +51,7 @@ above structured documentation, or delet
|
||||
ksm
|
||||
memory-model
|
||||
mmu_notifier
|
||||
+ multigen_lru
|
||||
numa
|
||||
overcommit-accounting
|
||||
page_migration
|
||||
--- /dev/null
|
||||
+++ b/Documentation/mm/multigen_lru.rst
|
||||
@@ -0,0 +1,159 @@
|
||||
+.. SPDX-License-Identifier: GPL-2.0
|
||||
+
|
||||
+=============
|
||||
+Multi-Gen LRU
|
||||
+=============
|
||||
+The multi-gen LRU is an alternative LRU implementation that optimizes
|
||||
+page reclaim and improves performance under memory pressure. Page
|
||||
+reclaim decides the kernel's caching policy and ability to overcommit
|
||||
+memory. It directly impacts the kswapd CPU usage and RAM efficiency.
|
||||
+
|
||||
+Design overview
|
||||
+===============
|
||||
+Objectives
|
||||
+----------
|
||||
+The design objectives are:
|
||||
+
|
||||
+* Good representation of access recency
|
||||
+* Try to profit from spatial locality
|
||||
+* Fast paths to make obvious choices
|
||||
+* Simple self-correcting heuristics
|
||||
+
|
||||
+The representation of access recency is at the core of all LRU
|
||||
+implementations. In the multi-gen LRU, each generation represents a
|
||||
+group of pages with similar access recency. Generations establish a
|
||||
+(time-based) common frame of reference and therefore help make better
|
||||
+choices, e.g., between different memcgs on a computer or different
|
||||
+computers in a data center (for job scheduling).
|
||||
+
|
||||
+Exploiting spatial locality improves efficiency when gathering the
|
||||
+accessed bit. A rmap walk targets a single page and does not try to
|
||||
+profit from discovering a young PTE. A page table walk can sweep all
|
||||
+the young PTEs in an address space, but the address space can be too
|
||||
+sparse to make a profit. The key is to optimize both methods and use
|
||||
+them in combination.
|
||||
+
|
||||
+Fast paths reduce code complexity and runtime overhead. Unmapped pages
|
||||
+do not require TLB flushes; clean pages do not require writeback.
|
||||
+These facts are only helpful when other conditions, e.g., access
|
||||
+recency, are similar. With generations as a common frame of reference,
|
||||
+additional factors stand out. But obvious choices might not be good
|
||||
+choices; thus self-correction is necessary.
|
||||
+
|
||||
+The benefits of simple self-correcting heuristics are self-evident.
|
||||
+Again, with generations as a common frame of reference, this becomes
|
||||
+attainable. Specifically, pages in the same generation can be
|
||||
+categorized based on additional factors, and a feedback loop can
|
||||
+statistically compare the refault percentages across those categories
|
||||
+and infer which of them are better choices.
|
||||
+
|
||||
+Assumptions
|
||||
+-----------
|
||||
+The protection of hot pages and the selection of cold pages are based
|
||||
+on page access channels and patterns. There are two access channels:
|
||||
+
|
||||
+* Accesses through page tables
|
||||
+* Accesses through file descriptors
|
||||
+
|
||||
+The protection of the former channel is by design stronger because:
|
||||
+
|
||||
+1. The uncertainty in determining the access patterns of the former
|
||||
+ channel is higher due to the approximation of the accessed bit.
|
||||
+2. The cost of evicting the former channel is higher due to the TLB
|
||||
+ flushes required and the likelihood of encountering the dirty bit.
|
||||
+3. The penalty of underprotecting the former channel is higher because
|
||||
+ applications usually do not prepare themselves for major page
|
||||
+ faults like they do for blocked I/O. E.g., GUI applications
|
||||
+ commonly use dedicated I/O threads to avoid blocking rendering
|
||||
+ threads.
|
||||
+
|
||||
+There are also two access patterns:
|
||||
+
|
||||
+* Accesses exhibiting temporal locality
|
||||
+* Accesses not exhibiting temporal locality
|
||||
+
|
||||
+For the reasons listed above, the former channel is assumed to follow
|
||||
+the former pattern unless ``VM_SEQ_READ`` or ``VM_RAND_READ`` is
|
||||
+present, and the latter channel is assumed to follow the latter
|
||||
+pattern unless outlying refaults have been observed.
|
||||
+
|
||||
+Workflow overview
|
||||
+=================
|
||||
+Evictable pages are divided into multiple generations for each
|
||||
+``lruvec``. The youngest generation number is stored in
|
||||
+``lrugen->max_seq`` for both anon and file types as they are aged on
|
||||
+an equal footing. The oldest generation numbers are stored in
|
||||
+``lrugen->min_seq[]`` separately for anon and file types as clean file
|
||||
+pages can be evicted regardless of swap constraints. These three
|
||||
+variables are monotonically increasing.
|
||||
+
|
||||
+Generation numbers are truncated into ``order_base_2(MAX_NR_GENS+1)``
|
||||
+bits in order to fit into the gen counter in ``folio->flags``. Each
|
||||
+truncated generation number is an index to ``lrugen->lists[]``. The
|
||||
+sliding window technique is used to track at least ``MIN_NR_GENS`` and
|
||||
+at most ``MAX_NR_GENS`` generations. The gen counter stores a value
|
||||
+within ``[1, MAX_NR_GENS]`` while a page is on one of
|
||||
+``lrugen->lists[]``; otherwise it stores zero.
|
||||
+
|
||||
+Each generation is divided into multiple tiers. A page accessed ``N``
|
||||
+times through file descriptors is in tier ``order_base_2(N)``. Unlike
|
||||
+generations, tiers do not have dedicated ``lrugen->lists[]``. In
|
||||
+contrast to moving across generations, which requires the LRU lock,
|
||||
+moving across tiers only involves atomic operations on
|
||||
+``folio->flags`` and therefore has a negligible cost. A feedback loop
|
||||
+modeled after the PID controller monitors refaults over all the tiers
|
||||
+from anon and file types and decides which tiers from which types to
|
||||
+evict or protect.
|
||||
+
|
||||
+There are two conceptually independent procedures: the aging and the
|
||||
+eviction. They form a closed-loop system, i.e., the page reclaim.
|
||||
+
|
||||
+Aging
|
||||
+-----
|
||||
+The aging produces young generations. Given an ``lruvec``, it
|
||||
+increments ``max_seq`` when ``max_seq-min_seq+1`` approaches
|
||||
+``MIN_NR_GENS``. The aging promotes hot pages to the youngest
|
||||
+generation when it finds them accessed through page tables; the
|
||||
+demotion of cold pages happens consequently when it increments
|
||||
+``max_seq``. The aging uses page table walks and rmap walks to find
|
||||
+young PTEs. For the former, it iterates ``lruvec_memcg()->mm_list``
|
||||
+and calls ``walk_page_range()`` with each ``mm_struct`` on this list
|
||||
+to scan PTEs, and after each iteration, it increments ``max_seq``. For
|
||||
+the latter, when the eviction walks the rmap and finds a young PTE,
|
||||
+the aging scans the adjacent PTEs. For both, on finding a young PTE,
|
||||
+the aging clears the accessed bit and updates the gen counter of the
|
||||
+page mapped by this PTE to ``(max_seq%MAX_NR_GENS)+1``.
|
||||
+
|
||||
+Eviction
|
||||
+--------
|
||||
+The eviction consumes old generations. Given an ``lruvec``, it
|
||||
+increments ``min_seq`` when ``lrugen->lists[]`` indexed by
|
||||
+``min_seq%MAX_NR_GENS`` becomes empty. To select a type and a tier to
|
||||
+evict from, it first compares ``min_seq[]`` to select the older type.
|
||||
+If both types are equally old, it selects the one whose first tier has
|
||||
+a lower refault percentage. The first tier contains single-use
|
||||
+unmapped clean pages, which are the best bet. The eviction sorts a
|
||||
+page according to its gen counter if the aging has found this page
|
||||
+accessed through page tables and updated its gen counter. It also
|
||||
+moves a page to the next generation, i.e., ``min_seq+1``, if this page
|
||||
+was accessed multiple times through file descriptors and the feedback
|
||||
+loop has detected outlying refaults from the tier this page is in. To
|
||||
+this end, the feedback loop uses the first tier as the baseline, for
|
||||
+the reason stated earlier.
|
||||
+
|
||||
+Summary
|
||||
+-------
|
||||
+The multi-gen LRU can be disassembled into the following parts:
|
||||
+
|
||||
+* Generations
|
||||
+* Rmap walks
|
||||
+* Page table walks
|
||||
+* Bloom filters
|
||||
+* PID controller
|
||||
+
|
||||
+The aging and the eviction form a producer-consumer model;
|
||||
+specifically, the latter drives the former by the sliding window over
|
||||
+generations. Within the aging, rmap walks drive page table walks by
|
||||
+inserting hot densely populated page tables to the Bloom filters.
|
||||
+Within the eviction, the PID controller uses refaults as the feedback
|
||||
+to select types to evict and tiers to protect.
|
@ -0,0 +1,59 @@
|
||||
From e9aef3d90b4bd11fccbde3741f2396ea05a9f386 Mon Sep 17 00:00:00 2001
|
||||
From: Heiner Kallweit <hkallweit1@gmail.com>
|
||||
Date: Wed, 30 Nov 2022 23:28:26 +0100
|
||||
Subject: [PATCH] net: add netdev_sw_irq_coalesce_default_on()
|
||||
|
||||
Add a helper for drivers wanting to set SW IRQ coalescing
|
||||
by default. The related sysfs attributes can be used to
|
||||
override the default values.
|
||||
|
||||
Follow Jakub's suggestion and put this functionality into
|
||||
net core so that drivers wanting to use software interrupt
|
||||
coalescing per default don't have to open-code it.
|
||||
|
||||
Note that this function needs to be called before the
|
||||
netdevice is registered.
|
||||
|
||||
Suggested-by: Jakub Kicinski <kuba@kernel.org>
|
||||
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
include/linux/netdevice.h | 1 +
|
||||
net/core/dev.c | 16 ++++++++++++++++
|
||||
2 files changed, 17 insertions(+)
|
||||
|
||||
--- a/include/linux/netdevice.h
|
||||
+++ b/include/linux/netdevice.h
|
||||
@@ -78,6 +78,7 @@ struct xdp_buff;
|
||||
void synchronize_net(void);
|
||||
void netdev_set_default_ethtool_ops(struct net_device *dev,
|
||||
const struct ethtool_ops *ops);
|
||||
+void netdev_sw_irq_coalesce_default_on(struct net_device *dev);
|
||||
|
||||
/* Backlog congestion levels */
|
||||
#define NET_RX_SUCCESS 0 /* keep 'em coming, baby */
|
||||
--- a/net/core/dev.c
|
||||
+++ b/net/core/dev.c
|
||||
@@ -10535,6 +10535,22 @@ void netdev_set_default_ethtool_ops(stru
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops);
|
||||
|
||||
+/**
|
||||
+ * netdev_sw_irq_coalesce_default_on() - enable SW IRQ coalescing by default
|
||||
+ * @dev: netdev to enable the IRQ coalescing on
|
||||
+ *
|
||||
+ * Sets a conservative default for SW IRQ coalescing. Users can use
|
||||
+ * sysfs attributes to override the default values.
|
||||
+ */
|
||||
+void netdev_sw_irq_coalesce_default_on(struct net_device *dev)
|
||||
+{
|
||||
+ WARN_ON(dev->reg_state == NETREG_REGISTERED);
|
||||
+
|
||||
+ dev->gro_flush_timeout = 20000;
|
||||
+ dev->napi_defer_hard_irqs = 1;
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(netdev_sw_irq_coalesce_default_on);
|
||||
+
|
||||
void netdev_freemem(struct net_device *dev)
|
||||
{
|
||||
char *addr = (char *)dev - dev->padded;
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,56 @@
|
||||
From fd4f7a449938ffd21bf2f5a1708d811cc5f3daa5 Mon Sep 17 00:00:00 2001
|
||||
From: Denis Kirjanov <dkirjanov@suse.de>
|
||||
Date: Thu, 27 Oct 2022 21:45:02 +0300
|
||||
Subject: [PATCH 2/4] drivers: net: convert to boolean for the mac_managed_pm
|
||||
flag
|
||||
|
||||
Signed-off-by: Dennis Kirjanov <dkirjanov@suse.de>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/ethernet/freescale/fec_main.c | 2 +-
|
||||
drivers/net/ethernet/realtek/r8169_main.c | 2 +-
|
||||
drivers/net/usb/asix_devices.c | 4 ++--
|
||||
3 files changed, 4 insertions(+), 4 deletions(-)
|
||||
|
||||
--- a/drivers/net/ethernet/freescale/fec_main.c
|
||||
+++ b/drivers/net/ethernet/freescale/fec_main.c
|
||||
@@ -2226,7 +2226,7 @@ static int fec_enet_mii_probe(struct net
|
||||
fep->link = 0;
|
||||
fep->full_duplex = 0;
|
||||
|
||||
- phy_dev->mac_managed_pm = 1;
|
||||
+ phy_dev->mac_managed_pm = true;
|
||||
|
||||
phy_attached_info(phy_dev);
|
||||
|
||||
--- a/drivers/net/ethernet/realtek/r8169_main.c
|
||||
+++ b/drivers/net/ethernet/realtek/r8169_main.c
|
||||
@@ -5018,7 +5018,7 @@ static int r8169_mdio_register(struct rt
|
||||
return -EUNATCH;
|
||||
}
|
||||
|
||||
- tp->phydev->mac_managed_pm = 1;
|
||||
+ tp->phydev->mac_managed_pm = true;
|
||||
|
||||
phy_support_asym_pause(tp->phydev);
|
||||
|
||||
--- a/drivers/net/usb/asix_devices.c
|
||||
+++ b/drivers/net/usb/asix_devices.c
|
||||
@@ -700,7 +700,7 @@ static int ax88772_init_phy(struct usbne
|
||||
}
|
||||
|
||||
phy_suspend(priv->phydev);
|
||||
- priv->phydev->mac_managed_pm = 1;
|
||||
+ priv->phydev->mac_managed_pm = true;
|
||||
|
||||
phy_attached_info(priv->phydev);
|
||||
|
||||
@@ -720,7 +720,7 @@ static int ax88772_init_phy(struct usbne
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
- priv->phydev_int->mac_managed_pm = 1;
|
||||
+ priv->phydev_int->mac_managed_pm = true;
|
||||
phy_suspend(priv->phydev_int);
|
||||
|
||||
return 0;
|
@ -0,0 +1,38 @@
|
||||
From fd149c4ab09b01136c7e80db020eed59a3385d24 Mon Sep 17 00:00:00 2001
|
||||
From: Juhee Kang <claudiajkang@gmail.com>
|
||||
Date: Wed, 30 Nov 2022 01:12:44 +0900
|
||||
Subject: [PATCH 3/4] r8169: use tp_to_dev instead of open code
|
||||
|
||||
The open code is defined as a helper function(tp_to_dev) on r8169_main.c,
|
||||
which the open code is &tp->pci_dev->dev. The helper function was added
|
||||
in commit 1e1205b7d3e9 ("r8169: add helper tp_to_dev"). And then later,
|
||||
commit f1e911d5d0df ("r8169: add basic phylib support") added
|
||||
r8169_phylink_handler function but it didn't use the helper function.
|
||||
Thus, tp_to_dev() replaces the open code. This patch doesn't change logic.
|
||||
|
||||
Signed-off-by: Juhee Kang <claudiajkang@gmail.com>
|
||||
Reviewed-by: Heiner Kallweit <hkallweit1@gmail.com>
|
||||
Link: https://lore.kernel.org/r/20221129161244.5356-1-claudiajkang@gmail.com
|
||||
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
|
||||
---
|
||||
drivers/net/ethernet/realtek/r8169_main.c | 5 +++--
|
||||
1 file changed, 3 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/drivers/net/ethernet/realtek/r8169_main.c
|
||||
+++ b/drivers/net/ethernet/realtek/r8169_main.c
|
||||
@@ -4559,12 +4559,13 @@ static int rtl8169_poll(struct napi_stru
|
||||
static void r8169_phylink_handler(struct net_device *ndev)
|
||||
{
|
||||
struct rtl8169_private *tp = netdev_priv(ndev);
|
||||
+ struct device *d = tp_to_dev(tp);
|
||||
|
||||
if (netif_carrier_ok(ndev)) {
|
||||
rtl_link_chg_patch(tp);
|
||||
- pm_request_resume(&tp->pci_dev->dev);
|
||||
+ pm_request_resume(d);
|
||||
} else {
|
||||
- pm_runtime_idle(&tp->pci_dev->dev);
|
||||
+ pm_runtime_idle(d);
|
||||
}
|
||||
|
||||
phy_print_status(tp->phydev);
|
@ -0,0 +1,33 @@
|
||||
From 74ec605a11b7ecf68036c3f086f684bbe7381353 Mon Sep 17 00:00:00 2001
|
||||
From: Heiner Kallweit <hkallweit1@gmail.com>
|
||||
Date: Wed, 30 Nov 2022 23:30:15 +0100
|
||||
Subject: [PATCH 4/4] r8169: enable GRO software interrupt coalescing per
|
||||
default
|
||||
|
||||
There are reports about r8169 not reaching full line speed on certain
|
||||
systems (e.g. SBC's) with a 2.5Gbps link.
|
||||
There was a time when hardware interrupt coalescing was enabled per
|
||||
default, but this was changed due to ASPM-related issues on few systems.
|
||||
So let's use software interrupt coalescing instead and enable it
|
||||
using new function netdev_sw_irq_coalesce_default_on().
|
||||
|
||||
Even with these conservative settings interrupt load on my 1Gbps test
|
||||
system reduced significantly.
|
||||
|
||||
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/ethernet/realtek/r8169_main.c | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
--- a/drivers/net/ethernet/realtek/r8169_main.c
|
||||
+++ b/drivers/net/ethernet/realtek/r8169_main.c
|
||||
@@ -5283,6 +5283,8 @@ static int rtl_init_one(struct pci_dev *
|
||||
dev->hw_features |= NETIF_F_RXALL;
|
||||
dev->hw_features |= NETIF_F_RXFCS;
|
||||
|
||||
+ netdev_sw_irq_coalesce_default_on(dev);
|
||||
+
|
||||
/* configure chip for default features */
|
||||
rtl8169_set_features(dev, dev->features);
|
||||
|
@ -0,0 +1,65 @@
|
||||
From 63db0cb35e1cb3b3c134906d1062f65513fdda2d Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
|
||||
Date: Tue, 4 Oct 2022 10:37:09 +0200
|
||||
Subject: [PATCH] mtd: core: simplify (a bit) code find partition-matching
|
||||
dynamic OF node
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
1. Don't hardcode "partition-" string twice
|
||||
2. Use simpler logic & use ->name to avoid of_property_read_string()
|
||||
3. Use mtd_get_of_node() helper
|
||||
|
||||
Cc: Christian Marangi <ansuelsmth@gmail.com>
|
||||
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
|
||||
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
|
||||
Link: https://lore.kernel.org/linux-mtd/20221004083710.27704-1-zajec5@gmail.com
|
||||
---
|
||||
drivers/mtd/mtdcore.c | 16 +++++++---------
|
||||
1 file changed, 7 insertions(+), 9 deletions(-)
|
||||
|
||||
--- a/drivers/mtd/mtdcore.c
|
||||
+++ b/drivers/mtd/mtdcore.c
|
||||
@@ -551,18 +551,16 @@ static void mtd_check_of_node(struct mtd
|
||||
struct device_node *partitions, *parent_dn, *mtd_dn = NULL;
|
||||
const char *pname, *prefix = "partition-";
|
||||
int plen, mtd_name_len, offset, prefix_len;
|
||||
- struct mtd_info *parent;
|
||||
bool found = false;
|
||||
|
||||
/* Check if MTD already has a device node */
|
||||
- if (dev_of_node(&mtd->dev))
|
||||
+ if (mtd_get_of_node(mtd))
|
||||
return;
|
||||
|
||||
/* Check if a partitions node exist */
|
||||
if (!mtd_is_partition(mtd))
|
||||
return;
|
||||
- parent = mtd->parent;
|
||||
- parent_dn = of_node_get(dev_of_node(&parent->dev));
|
||||
+ parent_dn = of_node_get(mtd_get_of_node(mtd->parent));
|
||||
if (!parent_dn)
|
||||
return;
|
||||
|
||||
@@ -575,15 +573,15 @@ static void mtd_check_of_node(struct mtd
|
||||
|
||||
/* Search if a partition is defined with the same name */
|
||||
for_each_child_of_node(partitions, mtd_dn) {
|
||||
- offset = 0;
|
||||
-
|
||||
/* Skip partition with no/wrong prefix */
|
||||
- if (!of_node_name_prefix(mtd_dn, "partition-"))
|
||||
+ if (!of_node_name_prefix(mtd_dn, prefix))
|
||||
continue;
|
||||
|
||||
/* Label have priority. Check that first */
|
||||
- if (of_property_read_string(mtd_dn, "label", &pname)) {
|
||||
- of_property_read_string(mtd_dn, "name", &pname);
|
||||
+ if (!of_property_read_string(mtd_dn, "label", &pname)) {
|
||||
+ offset = 0;
|
||||
+ } else {
|
||||
+ pname = mtd_dn->name;
|
||||
offset = prefix_len;
|
||||
}
|
||||
|
@ -0,0 +1,84 @@
|
||||
From ddb8cefb7af288950447ca6eeeafb09977dab56f Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
|
||||
Date: Tue, 4 Oct 2022 10:37:10 +0200
|
||||
Subject: [PATCH] mtd: core: try to find OF node for every MTD partition
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
So far this feature was limited to the top-level "nvmem-cells" node.
|
||||
There are multiple parsers creating partitions and subpartitions
|
||||
dynamically. Extend that code to handle them too.
|
||||
|
||||
This allows finding partition-* node for every MTD (sub)partition.
|
||||
|
||||
Random example:
|
||||
|
||||
partitions {
|
||||
compatible = "brcm,bcm947xx-cfe-partitions";
|
||||
|
||||
partition-firmware {
|
||||
compatible = "brcm,trx";
|
||||
|
||||
partition-loader {
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
Cc: Christian Marangi <ansuelsmth@gmail.com>
|
||||
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
|
||||
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
|
||||
Link: https://lore.kernel.org/linux-mtd/20221004083710.27704-2-zajec5@gmail.com
|
||||
---
|
||||
drivers/mtd/mtdcore.c | 18 ++++++------------
|
||||
1 file changed, 6 insertions(+), 12 deletions(-)
|
||||
|
||||
--- a/drivers/mtd/mtdcore.c
|
||||
+++ b/drivers/mtd/mtdcore.c
|
||||
@@ -551,20 +551,22 @@ static void mtd_check_of_node(struct mtd
|
||||
struct device_node *partitions, *parent_dn, *mtd_dn = NULL;
|
||||
const char *pname, *prefix = "partition-";
|
||||
int plen, mtd_name_len, offset, prefix_len;
|
||||
- bool found = false;
|
||||
|
||||
/* Check if MTD already has a device node */
|
||||
if (mtd_get_of_node(mtd))
|
||||
return;
|
||||
|
||||
- /* Check if a partitions node exist */
|
||||
if (!mtd_is_partition(mtd))
|
||||
return;
|
||||
+
|
||||
parent_dn = of_node_get(mtd_get_of_node(mtd->parent));
|
||||
if (!parent_dn)
|
||||
return;
|
||||
|
||||
- partitions = of_get_child_by_name(parent_dn, "partitions");
|
||||
+ if (mtd_is_partition(mtd->parent))
|
||||
+ partitions = of_node_get(parent_dn);
|
||||
+ else
|
||||
+ partitions = of_get_child_by_name(parent_dn, "partitions");
|
||||
if (!partitions)
|
||||
goto exit_parent;
|
||||
|
||||
@@ -588,19 +590,11 @@ static void mtd_check_of_node(struct mtd
|
||||
plen = strlen(pname) - offset;
|
||||
if (plen == mtd_name_len &&
|
||||
!strncmp(mtd->name, pname + offset, plen)) {
|
||||
- found = true;
|
||||
+ mtd_set_of_node(mtd, mtd_dn);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
- if (!found)
|
||||
- goto exit_partitions;
|
||||
-
|
||||
- /* Set of_node only for nvmem */
|
||||
- if (of_device_is_compatible(mtd_dn, "nvmem-cells"))
|
||||
- mtd_set_of_node(mtd, mtd_dn);
|
||||
-
|
||||
-exit_partitions:
|
||||
of_node_put(partitions);
|
||||
exit_parent:
|
||||
of_node_put(parent_dn);
|
@ -0,0 +1,229 @@
|
||||
From aec4d5f5ffd0f0092bd9dc21ea90e0bc237d4b74 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
|
||||
Date: Sat, 15 Oct 2022 11:29:50 +0200
|
||||
Subject: [PATCH] mtd: parsers: add TP-Link SafeLoader partitions table parser
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
This parser deals with most TP-Link home routers. It reads info about
|
||||
partitions and registers them in the MTD subsystem.
|
||||
|
||||
Example from TP-Link Archer C5 V2:
|
||||
|
||||
spi-nor spi0.0: s25fl128s1 (16384 Kbytes)
|
||||
15 tplink-safeloader partitions found on MTD device spi0.0
|
||||
Creating 15 MTD partitions on "spi0.0":
|
||||
0x000000000000-0x000000040000 : "fs-uboot"
|
||||
0x000000040000-0x000000440000 : "os-image"
|
||||
0x000000440000-0x000000e40000 : "rootfs"
|
||||
0x000000e40000-0x000000e40200 : "default-mac"
|
||||
0x000000e40200-0x000000e40400 : "pin"
|
||||
0x000000e40400-0x000000e40600 : "product-info"
|
||||
0x000000e50000-0x000000e60000 : "partition-table"
|
||||
0x000000e60000-0x000000e60200 : "soft-version"
|
||||
0x000000e61000-0x000000e70000 : "support-list"
|
||||
0x000000e70000-0x000000e80000 : "profile"
|
||||
0x000000e80000-0x000000e90000 : "default-config"
|
||||
0x000000e90000-0x000000ee0000 : "user-config"
|
||||
0x000000ee0000-0x000000fe0000 : "log"
|
||||
0x000000fe0000-0x000000ff0000 : "radio_bk"
|
||||
0x000000ff0000-0x000001000000 : "radio"
|
||||
|
||||
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
|
||||
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
|
||||
Link: https://lore.kernel.org/linux-mtd/20221015092950.27467-2-zajec5@gmail.com
|
||||
---
|
||||
drivers/mtd/parsers/Kconfig | 15 +++
|
||||
drivers/mtd/parsers/Makefile | 1 +
|
||||
drivers/mtd/parsers/tplink_safeloader.c | 150 ++++++++++++++++++++++++
|
||||
3 files changed, 166 insertions(+)
|
||||
create mode 100644 drivers/mtd/parsers/tplink_safeloader.c
|
||||
|
||||
--- a/drivers/mtd/parsers/Kconfig
|
||||
+++ b/drivers/mtd/parsers/Kconfig
|
||||
@@ -123,6 +123,21 @@ config MTD_AFS_PARTS
|
||||
for your particular device. It won't happen automatically. The
|
||||
'physmap' map driver (CONFIG_MTD_PHYSMAP) does this, for example.
|
||||
|
||||
+config MTD_PARSER_TPLINK_SAFELOADER
|
||||
+ tristate "TP-Link Safeloader partitions parser"
|
||||
+ depends on MTD && (ARCH_BCM_5301X || ATH79 || SOC_MT7620 || SOC_MT7621 || COMPILE_TEST)
|
||||
+ help
|
||||
+ TP-Link home routers use flash partitions to store various data. Info
|
||||
+ about flash space layout is stored in a partitions table using a
|
||||
+ custom ASCII-based format.
|
||||
+
|
||||
+ That format was first found in devices with SafeLoader bootloader and
|
||||
+ was named after it. Later it was adapted to CFE and U-Boot
|
||||
+ bootloaders.
|
||||
+
|
||||
+ This driver reads partitions table, parses it and creates MTD
|
||||
+ partitions.
|
||||
+
|
||||
config MTD_PARSER_TRX
|
||||
tristate "Parser for TRX format partitions"
|
||||
depends on MTD && (BCM47XX || ARCH_BCM_5301X || ARCH_MEDIATEK || RALINK || COMPILE_TEST)
|
||||
--- a/drivers/mtd/parsers/Makefile
|
||||
+++ b/drivers/mtd/parsers/Makefile
|
||||
@@ -10,6 +10,7 @@ ofpart-$(CONFIG_MTD_OF_PARTS_BCM4908) +=
|
||||
ofpart-$(CONFIG_MTD_OF_PARTS_LINKSYS_NS)+= ofpart_linksys_ns.o
|
||||
obj-$(CONFIG_MTD_PARSER_IMAGETAG) += parser_imagetag.o
|
||||
obj-$(CONFIG_MTD_AFS_PARTS) += afs.o
|
||||
+obj-$(CONFIG_MTD_PARSER_TPLINK_SAFELOADER) += tplink_safeloader.o
|
||||
obj-$(CONFIG_MTD_PARSER_TRX) += parser_trx.o
|
||||
obj-$(CONFIG_MTD_SERCOMM_PARTS) += scpart.o
|
||||
obj-$(CONFIG_MTD_SHARPSL_PARTS) += sharpslpart.o
|
||||
--- /dev/null
|
||||
+++ b/drivers/mtd/parsers/tplink_safeloader.c
|
||||
@@ -0,0 +1,150 @@
|
||||
+// SPDX-License-Identifier: GPL-2.0-only
|
||||
+/*
|
||||
+ * Copyright © 2022 Rafał Miłecki <rafal@milecki.pl>
|
||||
+ */
|
||||
+
|
||||
+#include <linux/kernel.h>
|
||||
+#include <linux/module.h>
|
||||
+#include <linux/mtd/mtd.h>
|
||||
+#include <linux/mtd/partitions.h>
|
||||
+#include <linux/of.h>
|
||||
+#include <linux/slab.h>
|
||||
+
|
||||
+#define TPLINK_SAFELOADER_DATA_OFFSET 4
|
||||
+#define TPLINK_SAFELOADER_MAX_PARTS 32
|
||||
+
|
||||
+struct safeloader_cmn_header {
|
||||
+ __be32 size;
|
||||
+ uint32_t unused;
|
||||
+} __packed;
|
||||
+
|
||||
+static void *mtd_parser_tplink_safeloader_read_table(struct mtd_info *mtd)
|
||||
+{
|
||||
+ struct safeloader_cmn_header hdr;
|
||||
+ struct device_node *np;
|
||||
+ size_t bytes_read;
|
||||
+ size_t offset;
|
||||
+ size_t size;
|
||||
+ char *buf;
|
||||
+ int err;
|
||||
+
|
||||
+ np = mtd_get_of_node(mtd);
|
||||
+ if (mtd_is_partition(mtd))
|
||||
+ of_node_get(np);
|
||||
+ else
|
||||
+ np = of_get_child_by_name(np, "partitions");
|
||||
+
|
||||
+ if (of_property_read_u32(np, "partitions-table-offset", (u32 *)&offset)) {
|
||||
+ pr_err("Failed to get partitions table offset\n");
|
||||
+ goto err_put;
|
||||
+ }
|
||||
+
|
||||
+ err = mtd_read(mtd, offset, sizeof(hdr), &bytes_read, (uint8_t *)&hdr);
|
||||
+ if (err && !mtd_is_bitflip(err)) {
|
||||
+ pr_err("Failed to read from %s at 0x%zx\n", mtd->name, offset);
|
||||
+ goto err_put;
|
||||
+ }
|
||||
+
|
||||
+ size = be32_to_cpu(hdr.size);
|
||||
+
|
||||
+ buf = kmalloc(size + 1, GFP_KERNEL);
|
||||
+ if (!buf)
|
||||
+ goto err_put;
|
||||
+
|
||||
+ err = mtd_read(mtd, offset + sizeof(hdr), size, &bytes_read, buf);
|
||||
+ if (err && !mtd_is_bitflip(err)) {
|
||||
+ pr_err("Failed to read from %s at 0x%zx\n", mtd->name, offset + sizeof(hdr));
|
||||
+ goto err_kfree;
|
||||
+ }
|
||||
+
|
||||
+ buf[size] = '\0';
|
||||
+
|
||||
+ of_node_put(np);
|
||||
+
|
||||
+ return buf;
|
||||
+
|
||||
+err_kfree:
|
||||
+ kfree(buf);
|
||||
+err_put:
|
||||
+ of_node_put(np);
|
||||
+ return NULL;
|
||||
+}
|
||||
+
|
||||
+static int mtd_parser_tplink_safeloader_parse(struct mtd_info *mtd,
|
||||
+ const struct mtd_partition **pparts,
|
||||
+ struct mtd_part_parser_data *data)
|
||||
+{
|
||||
+ struct mtd_partition *parts;
|
||||
+ char name[65];
|
||||
+ size_t offset;
|
||||
+ size_t bytes;
|
||||
+ char *buf;
|
||||
+ int idx;
|
||||
+ int err;
|
||||
+
|
||||
+ parts = kcalloc(TPLINK_SAFELOADER_MAX_PARTS, sizeof(*parts), GFP_KERNEL);
|
||||
+ if (!parts) {
|
||||
+ err = -ENOMEM;
|
||||
+ goto err_out;
|
||||
+ }
|
||||
+
|
||||
+ buf = mtd_parser_tplink_safeloader_read_table(mtd);
|
||||
+ if (!buf) {
|
||||
+ err = -ENOENT;
|
||||
+ goto err_out;
|
||||
+ }
|
||||
+
|
||||
+ for (idx = 0, offset = TPLINK_SAFELOADER_DATA_OFFSET;
|
||||
+ idx < TPLINK_SAFELOADER_MAX_PARTS &&
|
||||
+ sscanf(buf + offset, "partition %64s base 0x%llx size 0x%llx%zn\n",
|
||||
+ name, &parts[idx].offset, &parts[idx].size, &bytes) == 3;
|
||||
+ idx++, offset += bytes + 1) {
|
||||
+ parts[idx].name = kstrdup(name, GFP_KERNEL);
|
||||
+ if (!parts[idx].name) {
|
||||
+ err = -ENOMEM;
|
||||
+ goto err_free;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (idx == TPLINK_SAFELOADER_MAX_PARTS)
|
||||
+ pr_warn("Reached maximum number of partitions!\n");
|
||||
+
|
||||
+ kfree(buf);
|
||||
+
|
||||
+ *pparts = parts;
|
||||
+
|
||||
+ return idx;
|
||||
+
|
||||
+err_free:
|
||||
+ for (idx -= 1; idx >= 0; idx--)
|
||||
+ kfree(parts[idx].name);
|
||||
+err_out:
|
||||
+ return err;
|
||||
+};
|
||||
+
|
||||
+static void mtd_parser_tplink_safeloader_cleanup(const struct mtd_partition *pparts,
|
||||
+ int nr_parts)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ for (i = 0; i < nr_parts; i++)
|
||||
+ kfree(pparts[i].name);
|
||||
+
|
||||
+ kfree(pparts);
|
||||
+}
|
||||
+
|
||||
+static const struct of_device_id mtd_parser_tplink_safeloader_of_match_table[] = {
|
||||
+ { .compatible = "tplink,safeloader-partitions" },
|
||||
+ {},
|
||||
+};
|
||||
+MODULE_DEVICE_TABLE(of, mtd_parser_tplink_safeloader_of_match_table);
|
||||
+
|
||||
+static struct mtd_part_parser mtd_parser_tplink_safeloader = {
|
||||
+ .parse_fn = mtd_parser_tplink_safeloader_parse,
|
||||
+ .cleanup = mtd_parser_tplink_safeloader_cleanup,
|
||||
+ .name = "tplink-safeloader",
|
||||
+ .of_match_table = mtd_parser_tplink_safeloader_of_match_table,
|
||||
+};
|
||||
+module_mtd_part_parser(mtd_parser_tplink_safeloader);
|
||||
+
|
||||
+MODULE_LICENSE("GPL");
|
File diff suppressed because it is too large
Load Diff
@ -1,11 +0,0 @@
|
||||
--- a/net/ipv4/Kconfig
|
||||
+++ b/net/ipv4/Kconfig
|
||||
@@ -315,7 +315,7 @@ config NET_IPVTI
|
||||
on top.
|
||||
|
||||
config NET_UDP_TUNNEL
|
||||
- tristate
|
||||
+ tristate "IP: UDP tunneling support"
|
||||
select NET_IP_TUNNEL
|
||||
default n
|
||||
|
@ -1,23 +0,0 @@
|
||||
From 8c817e33be829c7249c2cfd59ff48ad5fac6a31d Mon Sep 17 00:00:00 2001
|
||||
From: Sungbo Eo <mans0n@gorani.run>
|
||||
Date: Fri, 7 Jul 2017 17:09:21 +0200
|
||||
Subject: [PATCH] kconfig: solidify SATA_PMP config
|
||||
|
||||
SATA_PMP option in kernel config file disappears for every kernel_oldconfig refresh.
|
||||
To prevent this, SATA_HOST is now selected automatically when SATA_PMP is enabled.
|
||||
This patch can be dropped if SATA_MV is ever re-added into the config.
|
||||
---
|
||||
drivers/ata/Kconfig | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/ata/Kconfig
|
||||
+++ b/drivers/ata/Kconfig
|
||||
@@ -112,7 +112,7 @@ config SATA_ZPODD
|
||||
|
||||
config SATA_PMP
|
||||
bool "SATA Port Multiplier support"
|
||||
- depends on SATA_HOST
|
||||
+ select SATA_HOST
|
||||
default y
|
||||
help
|
||||
This option adds support for SATA Port Multipliers
|
@ -1,22 +0,0 @@
|
||||
--- a/init/Kconfig
|
||||
+++ b/init/Kconfig
|
||||
@@ -1954,7 +1954,7 @@ config PADATA
|
||||
bool
|
||||
|
||||
config ASN1
|
||||
- tristate
|
||||
+ tristate "ASN1"
|
||||
help
|
||||
Build a simple ASN.1 grammar compiler that produces a bytecode output
|
||||
that can be interpreted by the ASN.1 stream decoder and used to
|
||||
--- a/lib/Kconfig
|
||||
+++ b/lib/Kconfig
|
||||
@@ -627,7 +627,7 @@ config LIBFDT
|
||||
bool
|
||||
|
||||
config OID_REGISTRY
|
||||
- tristate
|
||||
+ tristate "OID"
|
||||
help
|
||||
Enable fast lookup object identifier registry.
|
||||
|
@ -1,15 +0,0 @@
|
||||
This makes it possible to select CONFIG_CRYPTO_LIB_ARC4 directly. We
|
||||
need this to be able to compile this into the kernel and make use of it
|
||||
from backports.
|
||||
|
||||
--- a/lib/crypto/Kconfig
|
||||
+++ b/lib/crypto/Kconfig
|
||||
@@ -6,7 +6,7 @@ config CRYPTO_LIB_AES
|
||||
tristate
|
||||
|
||||
config CRYPTO_LIB_ARC4
|
||||
- tristate
|
||||
+ tristate "ARC4 cipher library"
|
||||
|
||||
config CRYPTO_ARCH_HAVE_LIB_BLAKE2S
|
||||
bool
|
@ -1,38 +0,0 @@
|
||||
From: John Crispin <john@phrozen.org>
|
||||
Subject: hack: kernel: add generic image_cmdline hack to MIPS targets
|
||||
|
||||
lede-commit: d59f5b3a987a48508257a0ddbaeadc7909f9f976
|
||||
Signed-off-by: Gabor Juhos <juhosg@openwrt.org>
|
||||
---
|
||||
arch/mips/Kconfig | 4 ++++
|
||||
arch/mips/kernel/head.S | 6 ++++++
|
||||
2 files changed, 10 insertions(+)
|
||||
|
||||
--- a/arch/mips/Kconfig
|
||||
+++ b/arch/mips/Kconfig
|
||||
@@ -1112,6 +1112,10 @@ config MIPS_MSC
|
||||
config SYNC_R4K
|
||||
bool
|
||||
|
||||
+config IMAGE_CMDLINE_HACK
|
||||
+ bool "OpenWrt specific image command line hack"
|
||||
+ default n
|
||||
+
|
||||
config NO_IOPORT_MAP
|
||||
def_bool n
|
||||
|
||||
--- a/arch/mips/kernel/head.S
|
||||
+++ b/arch/mips/kernel/head.S
|
||||
@@ -79,6 +79,12 @@ FEXPORT(__kernel_entry)
|
||||
j kernel_entry
|
||||
#endif /* CONFIG_BOOT_RAW */
|
||||
|
||||
+#ifdef CONFIG_IMAGE_CMDLINE_HACK
|
||||
+ .ascii "CMDLINE:"
|
||||
+EXPORT(__image_cmdline)
|
||||
+ .fill 0x400
|
||||
+#endif /* CONFIG_IMAGE_CMDLINE_HACK */
|
||||
+
|
||||
__REF
|
||||
|
||||
NESTED(kernel_entry, 16, sp) # kernel entry point
|
@ -1,23 +0,0 @@
|
||||
--- a/drivers/mtd/nand/Kconfig
|
||||
+++ b/drivers/mtd/nand/Kconfig
|
||||
@@ -61,6 +61,10 @@ config MTD_NAND_ECC_MEDIATEK
|
||||
help
|
||||
This enables support for the hardware ECC engine from Mediatek.
|
||||
|
||||
+config MTD_NAND_MTK_BMT
|
||||
+ bool "Support MediaTek NAND Bad-block Management Table"
|
||||
+ default n
|
||||
+
|
||||
endmenu
|
||||
|
||||
endmenu
|
||||
--- a/drivers/mtd/nand/Makefile
|
||||
+++ b/drivers/mtd/nand/Makefile
|
||||
@@ -3,6 +3,7 @@
|
||||
nandcore-objs := core.o bbt.o
|
||||
obj-$(CONFIG_MTD_NAND_CORE) += nandcore.o
|
||||
obj-$(CONFIG_MTD_NAND_ECC_MEDIATEK) += ecc-mtk.o
|
||||
+obj-$(CONFIG_MTD_NAND_MTK_BMT) += mtk_bmt.o mtk_bmt_v2.o mtk_bmt_bbt.o mtk_bmt_nmbm.o
|
||||
|
||||
obj-y += onenand/
|
||||
obj-y += raw/
|
File diff suppressed because it is too large
Load Diff
@ -1,41 +0,0 @@
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Fri, 7 Jul 2017 17:18:54 +0200
|
||||
Subject: bridge: only accept EAP locally
|
||||
|
||||
When bridging, do not forward EAP frames to other ports, only deliver
|
||||
them locally, regardless of the state.
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
[add disable_eap_hack sysfs attribute]
|
||||
Signed-off-by: Etienne Champetier <champetier.etienne@gmail.com>
|
||||
---
|
||||
|
||||
--- a/net/bridge/br_input.c
|
||||
+++ b/net/bridge/br_input.c
|
||||
@@ -133,10 +133,14 @@ int br_handle_frame_finish(struct net *n
|
||||
}
|
||||
}
|
||||
|
||||
+ BR_INPUT_SKB_CB(skb)->brdev = br->dev;
|
||||
+
|
||||
+ if (skb->protocol == htons(ETH_P_PAE) && !br->disable_eap_hack)
|
||||
+ return br_pass_frame_up(skb);
|
||||
+
|
||||
if (state == BR_STATE_LEARNING)
|
||||
goto drop;
|
||||
|
||||
- BR_INPUT_SKB_CB(skb)->brdev = br->dev;
|
||||
BR_INPUT_SKB_CB(skb)->src_port_isolated = !!(p->flags & BR_ISOLATED);
|
||||
|
||||
if (IS_ENABLED(CONFIG_INET) &&
|
||||
--- a/net/bridge/br_private.h
|
||||
+++ b/net/bridge/br_private.h
|
||||
@@ -482,6 +482,8 @@ struct net_bridge {
|
||||
u16 group_fwd_mask;
|
||||
u16 group_fwd_mask_required;
|
||||
|
||||
+ bool disable_eap_hack;
|
||||
+
|
||||
/* STP */
|
||||
bridge_id designated_root;
|
||||
bridge_id bridge_id;
|
@ -1,100 +0,0 @@
|
||||
From 1d418f7e88035ed7a94073f6354246c66e9193e9 Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Fri, 7 Jul 2017 17:22:58 +0200
|
||||
Subject: fq_codel: switch default qdisc from pfifo_fast to fq_codel and remove pfifo_fast
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
include/net/sch_generic.h | 3 ++-
|
||||
net/sched/Kconfig | 3 ++-
|
||||
net/sched/sch_api.c | 2 +-
|
||||
net/sched/sch_fq_codel.c | 3 ++-
|
||||
net/sched/sch_generic.c | 4 ++--
|
||||
5 files changed, 9 insertions(+), 6 deletions(-)
|
||||
|
||||
--- a/include/net/sch_generic.h
|
||||
+++ b/include/net/sch_generic.h
|
||||
@@ -585,12 +585,13 @@ extern struct Qdisc_ops noop_qdisc_ops;
|
||||
extern struct Qdisc_ops pfifo_fast_ops;
|
||||
extern struct Qdisc_ops mq_qdisc_ops;
|
||||
extern struct Qdisc_ops noqueue_qdisc_ops;
|
||||
+extern struct Qdisc_ops fq_codel_qdisc_ops;
|
||||
extern const struct Qdisc_ops *default_qdisc_ops;
|
||||
static inline const struct Qdisc_ops *
|
||||
get_default_qdisc_ops(const struct net_device *dev, int ntx)
|
||||
{
|
||||
return ntx < dev->real_num_tx_queues ?
|
||||
- default_qdisc_ops : &pfifo_fast_ops;
|
||||
+ default_qdisc_ops : &fq_codel_qdisc_ops;
|
||||
}
|
||||
|
||||
struct Qdisc_class_common {
|
||||
--- a/net/sched/Kconfig
|
||||
+++ b/net/sched/Kconfig
|
||||
@@ -4,8 +4,9 @@
|
||||
#
|
||||
|
||||
menuconfig NET_SCHED
|
||||
- bool "QoS and/or fair queueing"
|
||||
+ def_bool y
|
||||
select NET_SCH_FIFO
|
||||
+ select NET_SCH_FQ_CODEL
|
||||
help
|
||||
When the kernel has several packets to send out over a network
|
||||
device, it has to decide which ones to send first, which ones to
|
||||
--- a/net/sched/sch_api.c
|
||||
+++ b/net/sched/sch_api.c
|
||||
@@ -2277,7 +2277,7 @@ static int __init pktsched_init(void)
|
||||
return err;
|
||||
}
|
||||
|
||||
- register_qdisc(&pfifo_fast_ops);
|
||||
+ register_qdisc(&fq_codel_qdisc_ops);
|
||||
register_qdisc(&pfifo_qdisc_ops);
|
||||
register_qdisc(&bfifo_qdisc_ops);
|
||||
register_qdisc(&pfifo_head_drop_qdisc_ops);
|
||||
--- a/net/sched/sch_fq_codel.c
|
||||
+++ b/net/sched/sch_fq_codel.c
|
||||
@@ -719,7 +719,7 @@ static const struct Qdisc_class_ops fq_c
|
||||
.walk = fq_codel_walk,
|
||||
};
|
||||
|
||||
-static struct Qdisc_ops fq_codel_qdisc_ops __read_mostly = {
|
||||
+struct Qdisc_ops fq_codel_qdisc_ops __read_mostly = {
|
||||
.cl_ops = &fq_codel_class_ops,
|
||||
.id = "fq_codel",
|
||||
.priv_size = sizeof(struct fq_codel_sched_data),
|
||||
@@ -734,6 +734,7 @@ static struct Qdisc_ops fq_codel_qdisc_o
|
||||
.dump_stats = fq_codel_dump_stats,
|
||||
.owner = THIS_MODULE,
|
||||
};
|
||||
+EXPORT_SYMBOL(fq_codel_qdisc_ops);
|
||||
|
||||
static int __init fq_codel_module_init(void)
|
||||
{
|
||||
--- a/net/sched/sch_generic.c
|
||||
+++ b/net/sched/sch_generic.c
|
||||
@@ -32,7 +32,7 @@
|
||||
#include <net/xfrm.h>
|
||||
|
||||
/* Qdisc to use by default */
|
||||
-const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops;
|
||||
+const struct Qdisc_ops *default_qdisc_ops = &fq_codel_qdisc_ops;
|
||||
EXPORT_SYMBOL(default_qdisc_ops);
|
||||
|
||||
static void qdisc_maybe_clear_missed(struct Qdisc *q,
|
||||
@@ -1142,12 +1142,12 @@ static void attach_one_default_qdisc(str
|
||||
void *_unused)
|
||||
{
|
||||
struct Qdisc *qdisc;
|
||||
- const struct Qdisc_ops *ops = default_qdisc_ops;
|
||||
+ const struct Qdisc_ops *ops = &fq_codel_qdisc_ops;
|
||||
|
||||
if (dev->priv_flags & IFF_NO_QUEUE)
|
||||
ops = &noqueue_qdisc_ops;
|
||||
else if(dev->type == ARPHRD_CAN)
|
||||
- ops = &pfifo_fast_ops;
|
||||
+ ops = &fq_codel_qdisc_ops;
|
||||
|
||||
qdisc = qdisc_create_dflt(dev_queue, ops, TC_H_ROOT, NULL);
|
||||
if (!qdisc)
|
@ -1,12 +0,0 @@
|
||||
--- a/drivers/of/fdt.c
|
||||
+++ b/drivers/of/fdt.c
|
||||
@@ -1179,6 +1179,9 @@ int __init early_init_dt_scan_chosen(cha
|
||||
p = of_get_flat_dt_prop(node, "bootargs", &l);
|
||||
if (p != NULL && l > 0)
|
||||
strlcpy(cmdline, p, min(l, COMMAND_LINE_SIZE));
|
||||
+ p = of_get_flat_dt_prop(node, "bootargs-append", &l);
|
||||
+ if (p != NULL && l > 0)
|
||||
+ strlcat(cmdline, p, min_t(int, strlen(cmdline) + (int)l, COMMAND_LINE_SIZE));
|
||||
|
||||
/*
|
||||
* CONFIG_CMDLINE is meant to be a default in case nothing else
|
@ -14,7 +14,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
|
||||
--- a/include/linux/module.h
|
||||
+++ b/include/linux/module.h
|
||||
@@ -164,6 +164,7 @@ extern void cleanup_module(void);
|
||||
@@ -163,6 +163,7 @@ extern void cleanup_module(void);
|
||||
|
||||
/* Generic info of form tag = "info" */
|
||||
#define MODULE_INFO(tag, info) __MODULE_INFO(tag, tag, info)
|
||||
@ -22,7 +22,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
|
||||
/* For userspace: you can also call me... */
|
||||
#define MODULE_ALIAS(_alias) MODULE_INFO(alias, _alias)
|
||||
@@ -233,12 +234,12 @@ extern void cleanup_module(void);
|
||||
@@ -232,12 +233,12 @@ extern void cleanup_module(void);
|
||||
* Author(s), use "Name <email>" or just "Name", for multiple
|
||||
* authors use multiple MODULE_AUTHOR() statements/lines.
|
||||
*/
|
||||
@ -38,7 +38,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
/* Creates an alias so file2alias.c can find device table. */
|
||||
#define MODULE_DEVICE_TABLE(type, name) \
|
||||
extern typeof(name) __mod_##type##__##name##_device_table \
|
||||
@@ -265,7 +266,9 @@ extern typeof(name) __mod_##type##__##na
|
||||
@@ -264,7 +265,9 @@ extern typeof(name) __mod_##type##__##na
|
||||
*/
|
||||
|
||||
#if defined(MODULE) || !defined(CONFIG_SYSFS)
|
||||
@ -49,7 +49,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
#else
|
||||
#define MODULE_VERSION(_version) \
|
||||
MODULE_INFO(version, _version); \
|
||||
@@ -288,7 +291,7 @@ extern typeof(name) __mod_##type##__##na
|
||||
@@ -287,7 +290,7 @@ extern typeof(name) __mod_##type##__##na
|
||||
/* Optional firmware file (or files) needed by the module
|
||||
* format is simply firmware file name. Multiple firmware
|
||||
* files require multiple MODULE_FIRMWARE() specifiers */
|
||||
@ -88,9 +88,9 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
|
||||
--- a/kernel/module/Kconfig
|
||||
+++ b/kernel/module/Kconfig
|
||||
@@ -286,6 +286,13 @@ config UNUSED_KSYMS_WHITELIST
|
||||
one per line. The path can be absolute, or relative to the kernel
|
||||
source tree.
|
||||
@@ -290,4 +290,11 @@ config MODULES_TREE_LOOKUP
|
||||
def_bool y
|
||||
depends on PERF_EVENTS || TRACING || CFI_CLANG
|
||||
|
||||
+config MODULE_STRIPPED
|
||||
+ bool "Reduce module size"
|
||||
@ -99,12 +99,26 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
+ Remove module parameter descriptions, author info, version, aliases,
|
||||
+ device tables, etc.
|
||||
+
|
||||
config MODULES_TREE_LOOKUP
|
||||
def_bool y
|
||||
depends on PERF_EVENTS || TRACING || CFI_CLANG
|
||||
endif # MODULES
|
||||
--- a/kernel/module/main.c
|
||||
+++ b/kernel/module/main.c
|
||||
@@ -1954,9 +1954,11 @@ static int setup_load_info(struct load_i
|
||||
@@ -988,6 +988,7 @@ size_t modinfo_attrs_count = ARRAY_SIZE(
|
||||
|
||||
static const char vermagic[] = VERMAGIC_STRING;
|
||||
|
||||
+#if defined(CONFIG_MODVERSIONS) || !defined(CONFIG_MODULE_STRIPPED)
|
||||
int try_to_force_load(struct module *mod, const char *reason)
|
||||
{
|
||||
#ifdef CONFIG_MODULE_FORCE_LOAD
|
||||
@@ -999,6 +1000,7 @@ int try_to_force_load(struct module *mod
|
||||
return -ENOEXEC;
|
||||
#endif
|
||||
}
|
||||
+#endif
|
||||
|
||||
static char *get_modinfo(const struct load_info *info, const char *tag);
|
||||
static char *get_next_modinfo(const struct load_info *info, const char *tag,
|
||||
@@ -1950,9 +1952,11 @@ static int setup_load_info(struct load_i
|
||||
|
||||
static int check_modinfo(struct module *mod, struct load_info *info, int flags)
|
||||
{
|
||||
@ -117,7 +131,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
if (flags & MODULE_INIT_IGNORE_VERMAGIC)
|
||||
modmagic = NULL;
|
||||
|
||||
@@ -1977,6 +1979,7 @@ static int check_modinfo(struct module *
|
||||
@@ -1973,6 +1977,7 @@ static int check_modinfo(struct module *
|
||||
mod->name);
|
||||
add_taint_module(mod, TAINT_OOT_MODULE, LOCKDEP_STILL_OK);
|
||||
}
|
||||
@ -148,7 +162,29 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
buf_printf(b, "\n");
|
||||
buf_printf(b, "__visible struct module __this_module\n");
|
||||
buf_printf(b, "__section(\".gnu.linkonce.this_module\") = {\n");
|
||||
@@ -2101,11 +2105,13 @@ static void add_depends(struct buffer *b
|
||||
@@ -1995,8 +1999,10 @@ static void add_header(struct buffer *b,
|
||||
buf_printf(b, "\t.arch = MODULE_ARCH_INIT,\n");
|
||||
buf_printf(b, "};\n");
|
||||
|
||||
+#ifndef CONFIG_MODULE_STRIPPED
|
||||
if (!external_module)
|
||||
buf_printf(b, "\nMODULE_INFO(intree, \"Y\");\n");
|
||||
+#endif
|
||||
|
||||
buf_printf(b,
|
||||
"\n"
|
||||
@@ -2004,8 +2010,10 @@ static void add_header(struct buffer *b,
|
||||
"MODULE_INFO(retpoline, \"Y\");\n"
|
||||
"#endif\n");
|
||||
|
||||
+#ifndef CONFIG_MODULE_STRIPPED
|
||||
if (strstarts(mod->name, "drivers/staging"))
|
||||
buf_printf(b, "\nMODULE_INFO(staging, \"Y\");\n");
|
||||
+#endif
|
||||
|
||||
if (strstarts(mod->name, "tools/testing"))
|
||||
buf_printf(b, "\nMODULE_INFO(test, \"Y\");\n");
|
||||
@@ -2101,11 +2109,13 @@ static void add_depends(struct buffer *b
|
||||
|
||||
static void add_srcversion(struct buffer *b, struct module *mod)
|
||||
{
|
||||
@ -162,7 +198,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
}
|
||||
|
||||
static void write_buf(struct buffer *b, const char *fname)
|
||||
@@ -2191,7 +2197,9 @@ static void write_mod_c_file(struct modu
|
||||
@@ -2191,7 +2201,9 @@ static void write_mod_c_file(struct modu
|
||||
add_exported_symbols(&buf, mod);
|
||||
add_versions(&buf, mod);
|
||||
add_depends(&buf, mod);
|
@ -1,3 +1,12 @@
|
||||
From 300d26562ce4dc427154cb247beb75db4b1f0774 Mon Sep 17 00:00:00 2001
|
||||
From: OpenWrt community <openwrt-devel@lists.openwrt.org>
|
||||
Date: Wed, 13 Jul 2022 13:29:57 +0200
|
||||
Subject: [PATCH] scripts/Kconfig: Kconfig exit
|
||||
|
||||
---
|
||||
scripts/kconfig/conf.c | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
--- a/scripts/kconfig/conf.c
|
||||
+++ b/scripts/kconfig/conf.c
|
||||
@@ -432,6 +432,8 @@ static int conf_sym(struct menu *menu)
|
@ -81,3 +81,13 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
|
||||
struct page;
|
||||
struct kmem_cache;
|
||||
--- a/tools/perf/pmu-events/jevents.py
|
||||
+++ b/tools/perf/pmu-events/jevents.py
|
||||
@@ -684,6 +684,7 @@ def main() -> None:
|
||||
#include "util/header.h"
|
||||
#include "util/pmu.h"
|
||||
#include <string.h>
|
||||
+#include <strings.h>
|
||||
#include <stddef.h>
|
||||
|
||||
struct compact_pmu_event {
|
@ -12,7 +12,7 @@ Signed-off-by: Gabor Juhos <juhosg@openwrt.org>
|
||||
---
|
||||
--- a/arch/arm/Kconfig
|
||||
+++ b/arch/arm/Kconfig
|
||||
@@ -121,6 +121,7 @@ config ARM
|
||||
@@ -122,6 +122,7 @@ config ARM
|
||||
select HAVE_UID16
|
||||
select HAVE_VIRT_CPU_ACCOUNTING_GEN
|
||||
select IRQ_FORCED_THREADING
|
@ -30,7 +30,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
/* Align . to a 8 byte boundary equals to maximum function alignment. */
|
||||
#define ALIGN_FUNCTION() . = ALIGN(8)
|
||||
|
||||
@@ -479,14 +489,14 @@
|
||||
@@ -512,14 +522,14 @@
|
||||
/* Kernel symbol table: Normal symbols */ \
|
||||
__ksymtab : AT(ADDR(__ksymtab) - LOAD_OFFSET) { \
|
||||
__start___ksymtab = .; \
|
||||
@ -47,7 +47,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
__stop___ksymtab_gpl = .; \
|
||||
} \
|
||||
\
|
||||
@@ -506,7 +516,7 @@
|
||||
@@ -539,7 +549,7 @@
|
||||
\
|
||||
/* Kernel symbol table: strings */ \
|
||||
__ksymtab_strings : AT(ADDR(__ksymtab_strings) - LOAD_OFFSET) { \
|
||||
@ -56,10 +56,10 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
} \
|
||||
\
|
||||
/* __*init sections */ \
|
||||
@@ -1023,6 +1033,8 @@
|
||||
|
||||
@@ -1043,6 +1053,8 @@
|
||||
#define COMMON_DISCARDS \
|
||||
SANITIZER_DISCARDS \
|
||||
PATCHABLE_DISCARDS \
|
||||
+ SYMTAB_DISCARD \
|
||||
+ SYMTAB_DISCARD_GPL \
|
||||
*(.discard) \
|
||||
@ -89,9 +89,33 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
"__kstrtab_" #sym ": \n" \
|
||||
" .asciz \"" #sym "\" \n" \
|
||||
"__kstrtabns_" #sym ": \n" \
|
||||
--- a/include/asm-generic/export.h
|
||||
+++ b/include/asm-generic/export.h
|
||||
@@ -31,6 +31,12 @@
|
||||
#endif
|
||||
.endm
|
||||
|
||||
+#ifdef MODULE
|
||||
+#define __EXPORT_SUFFIX(name)
|
||||
+#else
|
||||
+#define __EXPORT_SUFFIX(name) + #name
|
||||
+#endif
|
||||
+
|
||||
/*
|
||||
* note on .section use: we specify progbits since usage of the "M" (SHF_MERGE)
|
||||
* section flag requires it. Use '%progbits' instead of '@progbits' since the
|
||||
@@ -44,7 +50,7 @@
|
||||
__ksymtab_\name:
|
||||
__put \val, __kstrtab_\name
|
||||
.previous
|
||||
- .section __ksymtab_strings,"aMS",%progbits,1
|
||||
+ .section __ksymtab_strings __EXPORT_SUFFIX(name),"aMS",%progbits,1
|
||||
__kstrtab_\name:
|
||||
.asciz "\name"
|
||||
.previous
|
||||
--- a/scripts/Makefile.build
|
||||
+++ b/scripts/Makefile.build
|
||||
@@ -328,7 +328,7 @@ targets += $(real-dtb-y) $(lib-y) $(alwa
|
||||
@@ -388,7 +388,7 @@ targets += $(real-dtb-y) $(lib-y) $(alwa
|
||||
# Linker scripts preprocessor (.lds.S -> .lds)
|
||||
# ---------------------------------------------------------------------------
|
||||
quiet_cmd_cpp_lds_S = LDS $@
|
@ -23,12 +23,16 @@ Signed-off-by: Imre Kaloz <kaloz@openwrt.org>
|
||||
{ {0x02, 0x21}, "lz4", unlz4 },
|
||||
--- a/scripts/Makefile.lib
|
||||
+++ b/scripts/Makefile.lib
|
||||
@@ -421,7 +421,7 @@ quiet_cmd_bzip2_with_size = BZIP2 $@
|
||||
@@ -443,10 +443,10 @@ quiet_cmd_bzip2_with_size = BZIP2 $@
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
quiet_cmd_lzma = LZMA $@
|
||||
- cmd_lzma = cat $(real-prereqs) | $(LZMA) -9 > $@
|
||||
+ cmd_lzma = { cat $(real-prereqs) | $(LZMA) e -d20 -lc1 -lp2 -pb2 -eos -si -so; $(size_append); } > $@
|
||||
+ cmd_lzma = cat $(real-prereqs) | $(LZMA) e -d20 -lc1 -lp2 -pb2 -eos -si -so > $@
|
||||
|
||||
quiet_cmd_lzma_with_size = LZMA $@
|
||||
cmd_lzma_with_size = { cat $(real-prereqs) | $(LZMA) -9; $(size_append); } > $@
|
||||
- cmd_lzma_with_size = { cat $(real-prereqs) | $(LZMA) -9; $(size_append); } > $@
|
||||
+ cmd_lzma_with_size = { cat $(real-prereqs) | $(LZMA) e -d20 -lc1 -lp2 -pb2 -eos -si -so; $(size_append); } > $@
|
||||
|
||||
quiet_cmd_lzo = LZO $@
|
||||
cmd_lzo = cat $(real-prereqs) | $(KLZOP) -9 > $@
|
@ -92,7 +92,7 @@ Signed-off-by: John Crispin <john@phrozen.org>
|
||||
bool
|
||||
--- a/lib/Kconfig
|
||||
+++ b/lib/Kconfig
|
||||
@@ -456,16 +456,16 @@ config BCH_CONST_T
|
||||
@@ -457,16 +457,16 @@ config BCH_CONST_T
|
||||
# Textsearch support is select'ed if needed
|
||||
#
|
||||
config TEXTSEARCH
|
||||
@ -147,7 +147,7 @@ Signed-off-by: John Crispin <john@phrozen.org>
|
||||
|
||||
config CFG80211
|
||||
tristate "cfg80211 - wireless configuration API"
|
||||
@@ -204,7 +204,7 @@ config CFG80211_WEXT_EXPORT
|
||||
@@ -208,7 +208,7 @@ config CFG80211_WEXT_EXPORT
|
||||
endif # CFG80211
|
||||
|
||||
config LIB80211
|
||||
@ -156,7 +156,7 @@ Signed-off-by: John Crispin <john@phrozen.org>
|
||||
default n
|
||||
help
|
||||
This options enables a library of common routines used
|
||||
@@ -213,17 +213,17 @@ config LIB80211
|
||||
@@ -217,17 +217,17 @@ config LIB80211
|
||||
Drivers should select this themselves if needed.
|
||||
|
||||
config LIB80211_CRYPT_WEP
|
32
target/linux/generic/hack-6.1/253-ksmbd-config.patch
Normal file
32
target/linux/generic/hack-6.1/253-ksmbd-config.patch
Normal file
@ -0,0 +1,32 @@
|
||||
From dcd966fa7ca63f38cf7147e1184d13d66e2ca340 Mon Sep 17 00:00:00 2001
|
||||
From: OpenWrt community <openwrt-devel@lists.openwrt.org>
|
||||
Date: Wed, 13 Jul 2022 13:33:30 +0200
|
||||
Subject: [PATCH] Kconfig: add tristate for OID and ASNI string
|
||||
|
||||
---
|
||||
init/Kconfig | 2 +-
|
||||
lib/Kconfig | 2 +-
|
||||
2 files changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/init/Kconfig
|
||||
+++ b/init/Kconfig
|
||||
@@ -2003,7 +2003,7 @@ config PADATA
|
||||
bool
|
||||
|
||||
config ASN1
|
||||
- tristate
|
||||
+ tristate "ASN1"
|
||||
help
|
||||
Build a simple ASN.1 grammar compiler that produces a bytecode output
|
||||
that can be interpreted by the ASN.1 stream decoder and used to
|
||||
--- a/lib/Kconfig
|
||||
+++ b/lib/Kconfig
|
||||
@@ -637,7 +637,7 @@ config LIBFDT
|
||||
bool
|
||||
|
||||
config OID_REGISTRY
|
||||
- tristate
|
||||
+ tristate "OID"
|
||||
help
|
||||
Enable fast lookup object identifier registry.
|
||||
|
@ -125,7 +125,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/property.h>
|
||||
@@ -3384,3 +3385,5 @@ static int __init regmap_initcall(void)
|
||||
@@ -3505,3 +3506,5 @@ static int __init regmap_initcall(void)
|
||||
return 0;
|
||||
}
|
||||
postcore_initcall(regmap_initcall);
|
24
target/linux/generic/hack-6.1/261-lib-arc4-unhide.patch
Normal file
24
target/linux/generic/hack-6.1/261-lib-arc4-unhide.patch
Normal file
@ -0,0 +1,24 @@
|
||||
From 241e5d3f7b0dd3c01f8c7fa83cbc9a3882286d53 Mon Sep 17 00:00:00 2001
|
||||
From: OpenWrt community <openwrt-devel@lists.openwrt.org>
|
||||
Date: Wed, 13 Jul 2022 13:35:18 +0200
|
||||
Subject: [PATCH] lib/crypto: add tristate string for ARC4
|
||||
|
||||
This makes it possible to select CONFIG_CRYPTO_LIB_ARC4 directly. We
|
||||
need this to be able to compile this into the kernel and make use of it
|
||||
from backports.
|
||||
|
||||
---
|
||||
lib/crypto/Kconfig | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/lib/crypto/Kconfig
|
||||
+++ b/lib/crypto/Kconfig
|
||||
@@ -9,7 +9,7 @@ config CRYPTO_LIB_AES
|
||||
tristate
|
||||
|
||||
config CRYPTO_LIB_ARC4
|
||||
- tristate
|
||||
+ tristate "ARC4 cipher library"
|
||||
|
||||
config CRYPTO_ARCH_HAVE_LIB_BLAKE2S
|
||||
bool
|
@ -26,7 +26,7 @@ Signed-off-by: John Crispin <john@phrozen.org>
|
||||
* @name: name of the struct -- the string is not copied internally
|
||||
--- a/net/Makefile
|
||||
+++ b/net/Makefile
|
||||
@@ -52,7 +52,7 @@ obj-$(CONFIG_TIPC) += tipc/
|
||||
@@ -51,7 +51,7 @@ obj-$(CONFIG_TIPC) += tipc/
|
||||
obj-$(CONFIG_NETLABEL) += netlabel/
|
||||
obj-$(CONFIG_IUCV) += iucv/
|
||||
obj-$(CONFIG_SMC) += smc/
|
@ -0,0 +1,112 @@
|
||||
From 0bccc3722bdd88e8ae995e77ef9f7b77ee4cbdee Mon Sep 17 00:00:00 2001
|
||||
From: Daniel Golle <daniel@makrotopia.org>
|
||||
Date: Wed, 7 Apr 2021 22:45:54 +0100
|
||||
Subject: [PATCH 2/2] mtd: blktrans: call add disks after mtd device
|
||||
To: linux-mtd@lists.infradead.org
|
||||
Cc: Vignesh Raghavendra <vigneshr@ti.com>,
|
||||
Richard Weinberger <richard@nod.at>,
|
||||
Miquel Raynal <miquel.raynal@bootlin.com>,
|
||||
David Woodhouse <dwmw2@infradead.org>
|
||||
|
||||
Calling device_add_disk while holding mtd_table_mutex leads
|
||||
to deadlock in case part_bits!=0 as block partition parsers
|
||||
will try to open the newly created disks, trying to acquire
|
||||
mutex once again.
|
||||
Move device_add_disk to additional function called after
|
||||
add partitions of an MTD device have been added and locks
|
||||
have been released.
|
||||
|
||||
Signed-off-by: Daniel Golle <daniel@makrotopia.org>
|
||||
---
|
||||
drivers/mtd/mtd_blkdevs.c | 33 ++++++++++++++++++++++++++-------
|
||||
drivers/mtd/mtdcore.c | 3 +++
|
||||
include/linux/mtd/blktrans.h | 1 +
|
||||
3 files changed, 30 insertions(+), 7 deletions(-)
|
||||
|
||||
--- a/drivers/mtd/mtd_blkdevs.c
|
||||
+++ b/drivers/mtd/mtd_blkdevs.c
|
||||
@@ -386,19 +386,8 @@ int add_mtd_blktrans_dev(struct mtd_blkt
|
||||
if (new->readonly)
|
||||
set_disk_ro(gd, 1);
|
||||
|
||||
- ret = device_add_disk(&new->mtd->dev, gd, NULL);
|
||||
- if (ret)
|
||||
- goto out_cleanup_disk;
|
||||
-
|
||||
- if (new->disk_attributes) {
|
||||
- ret = sysfs_create_group(&disk_to_dev(gd)->kobj,
|
||||
- new->disk_attributes);
|
||||
- WARN_ON(ret);
|
||||
- }
|
||||
return 0;
|
||||
|
||||
-out_cleanup_disk:
|
||||
- put_disk(new->disk);
|
||||
out_free_tag_set:
|
||||
blk_mq_free_tag_set(new->tag_set);
|
||||
out_kfree_tag_set:
|
||||
@@ -408,6 +397,35 @@ out_list_del:
|
||||
return ret;
|
||||
}
|
||||
|
||||
+void register_mtd_blktrans_devs(void)
|
||||
+{
|
||||
+ struct mtd_blktrans_ops *tr;
|
||||
+ struct mtd_blktrans_dev *dev, *next;
|
||||
+ int ret;
|
||||
+
|
||||
+ list_for_each_entry(tr, &blktrans_majors, list) {
|
||||
+ list_for_each_entry_safe(dev, next, &tr->devs, list) {
|
||||
+ if (disk_live(dev->disk))
|
||||
+ continue;
|
||||
+
|
||||
+ ret = device_add_disk(&dev->mtd->dev, dev->disk, NULL);
|
||||
+ if (ret)
|
||||
+ goto out_cleanup_disk;
|
||||
+
|
||||
+ if (dev->disk_attributes) {
|
||||
+ ret = sysfs_create_group(&disk_to_dev(dev->disk)->kobj,
|
||||
+ dev->disk_attributes);
|
||||
+ WARN_ON(ret);
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return;
|
||||
+
|
||||
+out_cleanup_disk:
|
||||
+ put_disk(dev->disk);
|
||||
+}
|
||||
+
|
||||
int del_mtd_blktrans_dev(struct mtd_blktrans_dev *old)
|
||||
{
|
||||
unsigned long flags;
|
||||
--- a/drivers/mtd/mtdcore.c
|
||||
+++ b/drivers/mtd/mtdcore.c
|
||||
@@ -31,6 +31,7 @@
|
||||
|
||||
#include <linux/mtd/mtd.h>
|
||||
#include <linux/mtd/partitions.h>
|
||||
+#include <linux/mtd/blktrans.h>
|
||||
|
||||
#include "mtdcore.h"
|
||||
|
||||
@@ -1057,6 +1058,8 @@ int mtd_device_parse_register(struct mtd
|
||||
|
||||
ret = mtd_otp_nvmem_add(mtd);
|
||||
|
||||
+ register_mtd_blktrans_devs();
|
||||
+
|
||||
out:
|
||||
if (ret && device_is_registered(&mtd->dev))
|
||||
del_mtd_device(mtd);
|
||||
--- a/include/linux/mtd/blktrans.h
|
||||
+++ b/include/linux/mtd/blktrans.h
|
||||
@@ -76,6 +76,7 @@ extern int deregister_mtd_blktrans(struc
|
||||
extern int add_mtd_blktrans_dev(struct mtd_blktrans_dev *dev);
|
||||
extern int del_mtd_blktrans_dev(struct mtd_blktrans_dev *dev);
|
||||
extern int mtd_blktrans_cease_background(struct mtd_blktrans_dev *dev);
|
||||
+extern void register_mtd_blktrans_devs(void);
|
||||
|
||||
/**
|
||||
* module_mtd_blktrans() - Helper macro for registering a mtd blktrans driver
|
@ -1,6 +1,25 @@
|
||||
From 69357074558daf6ff24c9f58714935e9e095a865 Mon Sep 17 00:00:00 2001
|
||||
From: OpenWrt community <openwrt-devel@lists.openwrt.org>
|
||||
Date: Wed, 13 Jul 2022 13:37:33 +0200
|
||||
Subject: [PATCH] kernel: add block fit partition parser
|
||||
|
||||
---
|
||||
block/blk.h | 2 ++
|
||||
block/partitions/Kconfig | 7 +++++++
|
||||
block/partitions/Makefile | 1 +
|
||||
block/partitions/check.h | 3 +++
|
||||
block/partitions/core.c | 17 +++++++++++++++++
|
||||
block/partitions/efi.c | 8 ++++++++
|
||||
block/partitions/efi.h | 3 +++
|
||||
block/partitions/msdos.c | 10 ++++++++++
|
||||
drivers/mtd/mtd_blkdevs.c | 2 ++
|
||||
drivers/mtd/ubi/block.c | 3 +++
|
||||
include/linux/msdos_partition.h | 1 +
|
||||
11 files changed, 57 insertions(+)
|
||||
|
||||
--- a/block/blk.h
|
||||
+++ b/block/blk.h
|
||||
@@ -406,6 +406,8 @@ void blk_free_ext_minor(unsigned int min
|
||||
@@ -414,6 +414,8 @@ void blk_free_ext_minor(unsigned int min
|
||||
#define ADDPART_FLAG_NONE 0
|
||||
#define ADDPART_FLAG_RAID 1
|
||||
#define ADDPART_FLAG_WHOLEDISK 2
|
||||
@ -98,30 +117,6 @@
|
||||
return true;
|
||||
}
|
||||
|
||||
--- a/drivers/mtd/ubi/block.c
|
||||
+++ b/drivers/mtd/ubi/block.c
|
||||
@@ -433,6 +433,9 @@ int ubiblock_create(struct ubi_volume_in
|
||||
}
|
||||
gd->flags |= GENHD_FL_NO_PART;
|
||||
gd->private_data = dev;
|
||||
+#ifdef CONFIG_FIT_PARTITION
|
||||
+ gd->flags |= GENHD_FL_EXT_DEVT;
|
||||
+#endif
|
||||
sprintf(gd->disk_name, "ubiblock%d_%d", dev->ubi_num, dev->vol_id);
|
||||
set_capacity(gd, disk_capacity);
|
||||
dev->gd = gd;
|
||||
--- a/drivers/mtd/mtd_blkdevs.c
|
||||
+++ b/drivers/mtd/mtd_blkdevs.c
|
||||
@@ -346,6 +346,9 @@ int add_mtd_blktrans_dev(struct mtd_blkt
|
||||
gd->first_minor = (new->devnum) << tr->part_bits;
|
||||
gd->minors = 1 << tr->part_bits;
|
||||
gd->fops = &mtd_block_ops;
|
||||
+#ifdef CONFIG_FIT_PARTITION
|
||||
+ gd->flags |= GENHD_FL_EXT_DEVT;
|
||||
+#endif
|
||||
|
||||
if (tr->part_bits) {
|
||||
if (new->devnum < 26)
|
||||
--- a/block/partitions/efi.c
|
||||
+++ b/block/partitions/efi.c
|
||||
@@ -716,6 +716,9 @@ int efi_partition(struct parsed_partitio
|
@ -20,7 +20,7 @@ Signed-off-by: Gabor Juhos <juhosg@openwrt.org>
|
||||
#include <linux/nvmem-provider.h>
|
||||
|
||||
#include <linux/mtd/mtd.h>
|
||||
@@ -748,6 +749,16 @@ int add_mtd_device(struct mtd_info *mtd)
|
||||
@@ -751,6 +752,16 @@ int add_mtd_device(struct mtd_info *mtd)
|
||||
of this try_ nonsense, and no bitching about it
|
||||
either. :) */
|
||||
__module_get(THIS_MODULE);
|
33
target/linux/generic/hack-6.1/430-mtk-bmt-support.patch
Normal file
33
target/linux/generic/hack-6.1/430-mtk-bmt-support.patch
Normal file
@ -0,0 +1,33 @@
|
||||
From ac84397efb3b3868c71c10ad7521161773228a17 Mon Sep 17 00:00:00 2001
|
||||
From: OpenWrt community <openwrt-devel@lists.openwrt.org>
|
||||
Date: Wed, 13 Jul 2022 13:41:44 +0200
|
||||
Subject: [PATCH] mtd/nand: add MediaTek NAND bad block managment table
|
||||
|
||||
---
|
||||
drivers/mtd/nand/Kconfig | 4 ++++
|
||||
drivers/mtd/nand/Makefile | 1 +
|
||||
2 files changed, 5 insertions(+)
|
||||
|
||||
--- a/drivers/mtd/nand/Kconfig
|
||||
+++ b/drivers/mtd/nand/Kconfig
|
||||
@@ -46,6 +46,10 @@ config MTD_NAND_ECC_SW_BCH
|
||||
ECC codes. They are used with NAND devices requiring more than 1 bit
|
||||
of error correction.
|
||||
|
||||
+config MTD_NAND_MTK_BMT
|
||||
+ bool "Support MediaTek NAND Bad-block Management Table"
|
||||
+ default n
|
||||
+
|
||||
config MTD_NAND_ECC_MXIC
|
||||
bool "Macronix external hardware ECC engine"
|
||||
depends on HAS_IOMEM
|
||||
--- a/drivers/mtd/nand/Makefile
|
||||
+++ b/drivers/mtd/nand/Makefile
|
||||
@@ -3,6 +3,7 @@
|
||||
nandcore-objs := core.o bbt.o
|
||||
obj-$(CONFIG_MTD_NAND_CORE) += nandcore.o
|
||||
obj-$(CONFIG_MTD_NAND_ECC_MEDIATEK) += ecc-mtk.o
|
||||
+obj-$(CONFIG_MTD_NAND_MTK_BMT) += mtk_bmt.o mtk_bmt_v2.o mtk_bmt_bbt.o mtk_bmt_nmbm.o
|
||||
|
||||
obj-y += onenand/
|
||||
obj-y += raw/
|
846
target/linux/generic/hack-6.1/600-bridge_offload.patch
Normal file
846
target/linux/generic/hack-6.1/600-bridge_offload.patch
Normal file
@ -0,0 +1,846 @@
|
||||
From 11c3fae5afa6cac444d12622e2cf5af60a99c1ef Mon Sep 17 00:00:00 2001
|
||||
From: OpenWrt community <openwrt-devel@lists.openwrt.org>
|
||||
Date: Wed, 13 Jul 2022 13:43:15 +0200
|
||||
Subject: [PATCH] net/bridge: add bridge offload
|
||||
|
||||
---
|
||||
include/linux/if_bridge.h | 1 +
|
||||
net/bridge/Makefile | 2 +-
|
||||
net/bridge/br.c | 8 +
|
||||
net/bridge/br_device.c | 2 +
|
||||
net/bridge/br_fdb.c | 5 +
|
||||
net/bridge/br_forward.c | 3 +
|
||||
net/bridge/br_if.c | 6 +-
|
||||
net/bridge/br_input.c | 5 +
|
||||
net/bridge/br_offload.c | 438 ++++++++++++++++++++++++++++++++
|
||||
net/bridge/br_private.h | 22 +-
|
||||
net/bridge/br_private_offload.h | 23 ++
|
||||
net/bridge/br_stp.c | 3 +
|
||||
net/bridge/br_sysfs_br.c | 35 +++
|
||||
net/bridge/br_sysfs_if.c | 2 +
|
||||
net/bridge/br_vlan_tunnel.c | 3 +
|
||||
15 files changed, 555 insertions(+), 3 deletions(-)
|
||||
create mode 100644 net/bridge/br_offload.c
|
||||
create mode 100644 net/bridge/br_private_offload.h
|
||||
|
||||
--- a/include/linux/if_bridge.h
|
||||
+++ b/include/linux/if_bridge.h
|
||||
@@ -60,6 +60,7 @@ struct br_ip_list {
|
||||
#define BR_TX_FWD_OFFLOAD BIT(20)
|
||||
#define BR_PORT_LOCKED BIT(21)
|
||||
#define BR_BPDU_FILTER BIT(22)
|
||||
+#define BR_OFFLOAD BIT(23)
|
||||
|
||||
#define BR_DEFAULT_AGEING_TIME (300 * HZ)
|
||||
|
||||
--- a/net/bridge/Makefile
|
||||
+++ b/net/bridge/Makefile
|
||||
@@ -5,7 +5,7 @@
|
||||
|
||||
obj-$(CONFIG_BRIDGE) += bridge.o
|
||||
|
||||
-bridge-y := br.o br_device.o br_fdb.o br_forward.o br_if.o br_input.o \
|
||||
+bridge-y := br.o br_device.o br_fdb.o br_forward.o br_if.o br_input.o br_offload.o \
|
||||
br_ioctl.o br_stp.o br_stp_bpdu.o \
|
||||
br_stp_if.o br_stp_timer.o br_netlink.o \
|
||||
br_netlink_tunnel.o br_arp_nd_proxy.o
|
||||
--- a/net/bridge/br.c
|
||||
+++ b/net/bridge/br.c
|
||||
@@ -18,6 +18,7 @@
|
||||
#include <net/switchdev.h>
|
||||
|
||||
#include "br_private.h"
|
||||
+#include "br_private_offload.h"
|
||||
|
||||
/*
|
||||
* Handle changes in state of network devices enslaved to a bridge.
|
||||
@@ -389,6 +390,10 @@ static int __init br_init(void)
|
||||
if (err)
|
||||
goto err_out;
|
||||
|
||||
+ err = br_offload_init();
|
||||
+ if (err)
|
||||
+ goto err_out0;
|
||||
+
|
||||
err = register_pernet_subsys(&br_net_ops);
|
||||
if (err)
|
||||
goto err_out1;
|
||||
@@ -438,6 +443,8 @@ err_out3:
|
||||
err_out2:
|
||||
unregister_pernet_subsys(&br_net_ops);
|
||||
err_out1:
|
||||
+ br_offload_fini();
|
||||
+err_out0:
|
||||
br_fdb_fini();
|
||||
err_out:
|
||||
stp_proto_unregister(&br_stp_proto);
|
||||
@@ -460,6 +467,7 @@ static void __exit br_deinit(void)
|
||||
#if IS_ENABLED(CONFIG_ATM_LANE)
|
||||
br_fdb_test_addr_hook = NULL;
|
||||
#endif
|
||||
+ br_offload_fini();
|
||||
br_fdb_fini();
|
||||
}
|
||||
|
||||
--- a/net/bridge/br_device.c
|
||||
+++ b/net/bridge/br_device.c
|
||||
@@ -525,6 +525,8 @@ void br_dev_setup(struct net_device *dev
|
||||
br->bridge_hello_time = br->hello_time = 2 * HZ;
|
||||
br->bridge_forward_delay = br->forward_delay = 15 * HZ;
|
||||
br->bridge_ageing_time = br->ageing_time = BR_DEFAULT_AGEING_TIME;
|
||||
+ br->offload_cache_size = 128;
|
||||
+ br->offload_cache_reserved = 8;
|
||||
dev->max_mtu = ETH_MAX_MTU;
|
||||
|
||||
br_netfilter_rtable_init(br);
|
||||
--- a/net/bridge/br_fdb.c
|
||||
+++ b/net/bridge/br_fdb.c
|
||||
@@ -23,6 +23,7 @@
|
||||
#include <net/switchdev.h>
|
||||
#include <trace/events/bridge.h>
|
||||
#include "br_private.h"
|
||||
+#include "br_private_offload.h"
|
||||
|
||||
static const struct rhashtable_params br_fdb_rht_params = {
|
||||
.head_offset = offsetof(struct net_bridge_fdb_entry, rhnode),
|
||||
@@ -185,6 +186,8 @@ static void fdb_notify(struct net_bridge
|
||||
struct sk_buff *skb;
|
||||
int err = -ENOBUFS;
|
||||
|
||||
+ br_offload_fdb_update(fdb);
|
||||
+
|
||||
if (swdev_notify)
|
||||
br_switchdev_fdb_notify(br, fdb, type);
|
||||
|
||||
@@ -393,6 +396,8 @@ static struct net_bridge_fdb_entry *fdb_
|
||||
fdb->key.vlan_id = vid;
|
||||
fdb->flags = flags;
|
||||
fdb->updated = fdb->used = jiffies;
|
||||
+ INIT_HLIST_HEAD(&fdb->offload_in);
|
||||
+ INIT_HLIST_HEAD(&fdb->offload_out);
|
||||
err = rhashtable_lookup_insert_fast(&br->fdb_hash_tbl, &fdb->rhnode,
|
||||
br_fdb_rht_params);
|
||||
if (err) {
|
||||
--- a/net/bridge/br_forward.c
|
||||
+++ b/net/bridge/br_forward.c
|
||||
@@ -16,6 +16,7 @@
|
||||
#include <linux/if_vlan.h>
|
||||
#include <linux/netfilter_bridge.h>
|
||||
#include "br_private.h"
|
||||
+#include "br_private_offload.h"
|
||||
|
||||
/* Don't forward packets to originating port or forwarding disabled */
|
||||
static inline int should_deliver(const struct net_bridge_port *p,
|
||||
@@ -32,6 +33,8 @@ static inline int should_deliver(const s
|
||||
|
||||
int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
+ br_offload_output(skb);
|
||||
+
|
||||
skb_push(skb, ETH_HLEN);
|
||||
if (!is_skb_forwardable(skb->dev, skb))
|
||||
goto drop;
|
||||
--- a/net/bridge/br_if.c
|
||||
+++ b/net/bridge/br_if.c
|
||||
@@ -25,6 +25,7 @@
|
||||
#include <net/net_namespace.h>
|
||||
|
||||
#include "br_private.h"
|
||||
+#include "br_private_offload.h"
|
||||
|
||||
/*
|
||||
* Determine initial path cost based on speed.
|
||||
@@ -437,7 +438,7 @@ static struct net_bridge_port *new_nbp(s
|
||||
p->path_cost = port_cost(dev);
|
||||
p->priority = 0x8000 >> BR_PORT_BITS;
|
||||
p->port_no = index;
|
||||
- p->flags = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD;
|
||||
+ p->flags = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD | BR_OFFLOAD;
|
||||
br_init_port(p);
|
||||
br_set_state(p, BR_STATE_DISABLED);
|
||||
br_stp_port_timer_init(p);
|
||||
@@ -761,6 +762,9 @@ void br_port_flags_change(struct net_bri
|
||||
|
||||
if (mask & BR_NEIGH_SUPPRESS)
|
||||
br_recalculate_neigh_suppress_enabled(br);
|
||||
+
|
||||
+ if (mask & BR_OFFLOAD)
|
||||
+ br_offload_port_state(p);
|
||||
}
|
||||
|
||||
bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag)
|
||||
--- a/net/bridge/br_input.c
|
||||
+++ b/net/bridge/br_input.c
|
||||
@@ -22,6 +22,7 @@
|
||||
#include <linux/rculist.h>
|
||||
#include "br_private.h"
|
||||
#include "br_private_tunnel.h"
|
||||
+#include "br_private_offload.h"
|
||||
|
||||
static int
|
||||
br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb)
|
||||
@@ -189,6 +190,7 @@ int br_handle_frame_finish(struct net *n
|
||||
dst->used = now;
|
||||
br_forward(dst->dst, skb, local_rcv, false);
|
||||
} else {
|
||||
+ br_offload_skb_disable(skb);
|
||||
if (!mcast_hit)
|
||||
br_flood(br, skb, pkt_type, local_rcv, false);
|
||||
else
|
||||
@@ -322,6 +324,9 @@ static rx_handler_result_t br_handle_fra
|
||||
memset(skb->cb, 0, sizeof(struct br_input_skb_cb));
|
||||
|
||||
p = br_port_get_rcu(skb->dev);
|
||||
+ if (br_offload_input(p, skb))
|
||||
+ return RX_HANDLER_CONSUMED;
|
||||
+
|
||||
if (p->flags & BR_VLAN_TUNNEL)
|
||||
br_handle_ingress_vlan_tunnel(skb, p, nbp_vlan_group_rcu(p));
|
||||
|
||||
--- /dev/null
|
||||
+++ b/net/bridge/br_offload.c
|
||||
@@ -0,0 +1,438 @@
|
||||
+// SPDX-License-Identifier: GPL-2.0-only
|
||||
+#include <linux/kernel.h>
|
||||
+#include <linux/workqueue.h>
|
||||
+#include "br_private.h"
|
||||
+#include "br_private_offload.h"
|
||||
+
|
||||
+static DEFINE_SPINLOCK(offload_lock);
|
||||
+
|
||||
+struct bridge_flow_key {
|
||||
+ u8 dest[ETH_ALEN];
|
||||
+ u8 src[ETH_ALEN];
|
||||
+#ifdef CONFIG_BRIDGE_VLAN_FILTERING
|
||||
+ u16 vlan_tag;
|
||||
+ bool vlan_present;
|
||||
+#endif
|
||||
+};
|
||||
+
|
||||
+struct bridge_flow {
|
||||
+ struct net_bridge_port *port;
|
||||
+ struct rhash_head node;
|
||||
+ struct bridge_flow_key key;
|
||||
+#ifdef CONFIG_BRIDGE_VLAN_FILTERING
|
||||
+ bool vlan_out_present;
|
||||
+ u16 vlan_out;
|
||||
+#endif
|
||||
+
|
||||
+ unsigned long used;
|
||||
+ struct net_bridge_fdb_entry *fdb_in, *fdb_out;
|
||||
+ struct hlist_node fdb_list_in, fdb_list_out;
|
||||
+
|
||||
+ struct rcu_head rcu;
|
||||
+};
|
||||
+
|
||||
+static const struct rhashtable_params flow_params = {
|
||||
+ .automatic_shrinking = true,
|
||||
+ .head_offset = offsetof(struct bridge_flow, node),
|
||||
+ .key_len = sizeof(struct bridge_flow_key),
|
||||
+ .key_offset = offsetof(struct bridge_flow, key),
|
||||
+};
|
||||
+
|
||||
+static struct kmem_cache *offload_cache __read_mostly;
|
||||
+
|
||||
+static void
|
||||
+flow_rcu_free(struct rcu_head *head)
|
||||
+{
|
||||
+ struct bridge_flow *flow;
|
||||
+
|
||||
+ flow = container_of(head, struct bridge_flow, rcu);
|
||||
+ kmem_cache_free(offload_cache, flow);
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+__br_offload_flow_free(struct bridge_flow *flow)
|
||||
+{
|
||||
+ flow->used = 0;
|
||||
+ hlist_del(&flow->fdb_list_in);
|
||||
+ hlist_del(&flow->fdb_list_out);
|
||||
+
|
||||
+ call_rcu(&flow->rcu, flow_rcu_free);
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+br_offload_flow_free(struct bridge_flow *flow)
|
||||
+{
|
||||
+ if (rhashtable_remove_fast(&flow->port->offload.rht, &flow->node,
|
||||
+ flow_params) != 0)
|
||||
+ return;
|
||||
+
|
||||
+ __br_offload_flow_free(flow);
|
||||
+}
|
||||
+
|
||||
+static bool
|
||||
+br_offload_flow_fdb_refresh_time(struct bridge_flow *flow,
|
||||
+ struct net_bridge_fdb_entry *fdb)
|
||||
+{
|
||||
+ if (!time_after(flow->used, fdb->updated))
|
||||
+ return false;
|
||||
+
|
||||
+ fdb->updated = flow->used;
|
||||
+
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+
|
||||
+static void
|
||||
+br_offload_flow_refresh_time(struct bridge_flow *flow)
|
||||
+{
|
||||
+ br_offload_flow_fdb_refresh_time(flow, flow->fdb_in);
|
||||
+ br_offload_flow_fdb_refresh_time(flow, flow->fdb_out);
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+br_offload_destroy_cb(void *ptr, void *arg)
|
||||
+{
|
||||
+ struct bridge_flow *flow = ptr;
|
||||
+
|
||||
+ __br_offload_flow_free(flow);
|
||||
+}
|
||||
+
|
||||
+static bool
|
||||
+br_offload_need_gc(struct net_bridge_port *p)
|
||||
+{
|
||||
+ return (atomic_read(&p->offload.rht.nelems) +
|
||||
+ p->br->offload_cache_reserved) >= p->br->offload_cache_size;
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+br_offload_gc_work(struct work_struct *work)
|
||||
+{
|
||||
+ struct rhashtable_iter hti;
|
||||
+ struct net_bridge_port *p;
|
||||
+ struct bridge_flow *gc_flow = NULL;
|
||||
+ struct bridge_flow *flow;
|
||||
+ unsigned long gc_used;
|
||||
+
|
||||
+ p = container_of(work, struct net_bridge_port, offload.gc_work);
|
||||
+
|
||||
+ if (!br_offload_need_gc(p))
|
||||
+ return;
|
||||
+
|
||||
+ rhashtable_walk_enter(&p->offload.rht, &hti);
|
||||
+ rhashtable_walk_start(&hti);
|
||||
+ while ((flow = rhashtable_walk_next(&hti)) != NULL) {
|
||||
+ unsigned long used;
|
||||
+
|
||||
+ if (IS_ERR(flow))
|
||||
+ continue;
|
||||
+
|
||||
+ used = READ_ONCE(flow->used);
|
||||
+ if (!used)
|
||||
+ continue;
|
||||
+
|
||||
+ if (gc_flow && !time_before(used, gc_used))
|
||||
+ continue;
|
||||
+
|
||||
+ gc_flow = flow;
|
||||
+ gc_used = used;
|
||||
+ }
|
||||
+ rhashtable_walk_stop(&hti);
|
||||
+ rhashtable_walk_exit(&hti);
|
||||
+
|
||||
+ if (!gc_flow)
|
||||
+ return;
|
||||
+
|
||||
+ spin_lock_bh(&offload_lock);
|
||||
+ if (br_offload_need_gc(p) && gc_flow &&
|
||||
+ gc_flow->used == gc_used)
|
||||
+ br_offload_flow_free(gc_flow);
|
||||
+ if (p->offload.enabled && br_offload_need_gc(p))
|
||||
+ queue_work(system_long_wq, work);
|
||||
+ spin_unlock_bh(&offload_lock);
|
||||
+
|
||||
+}
|
||||
+
|
||||
+void br_offload_port_state(struct net_bridge_port *p)
|
||||
+{
|
||||
+ struct net_bridge_port_offload *o = &p->offload;
|
||||
+ bool enabled = true;
|
||||
+ bool flush = false;
|
||||
+
|
||||
+ if (p->state != BR_STATE_FORWARDING ||
|
||||
+ !(p->flags & BR_OFFLOAD))
|
||||
+ enabled = false;
|
||||
+
|
||||
+ spin_lock_bh(&offload_lock);
|
||||
+ if (o->enabled == enabled)
|
||||
+ goto out;
|
||||
+
|
||||
+ if (enabled) {
|
||||
+ if (!o->gc_work.func)
|
||||
+ INIT_WORK(&o->gc_work, br_offload_gc_work);
|
||||
+ rhashtable_init(&o->rht, &flow_params);
|
||||
+ } else {
|
||||
+ flush = true;
|
||||
+ rhashtable_free_and_destroy(&o->rht, br_offload_destroy_cb, o);
|
||||
+ }
|
||||
+
|
||||
+ o->enabled = enabled;
|
||||
+
|
||||
+out:
|
||||
+ spin_unlock_bh(&offload_lock);
|
||||
+
|
||||
+ if (flush)
|
||||
+ flush_work(&o->gc_work);
|
||||
+}
|
||||
+
|
||||
+void br_offload_fdb_update(const struct net_bridge_fdb_entry *fdb)
|
||||
+{
|
||||
+ struct bridge_flow *f;
|
||||
+ struct hlist_node *tmp;
|
||||
+
|
||||
+ spin_lock_bh(&offload_lock);
|
||||
+
|
||||
+ hlist_for_each_entry_safe(f, tmp, &fdb->offload_in, fdb_list_in)
|
||||
+ br_offload_flow_free(f);
|
||||
+
|
||||
+ hlist_for_each_entry_safe(f, tmp, &fdb->offload_out, fdb_list_out)
|
||||
+ br_offload_flow_free(f);
|
||||
+
|
||||
+ spin_unlock_bh(&offload_lock);
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+br_offload_prepare_key(struct net_bridge_port *p, struct bridge_flow_key *key,
|
||||
+ struct sk_buff *skb)
|
||||
+{
|
||||
+ memset(key, 0, sizeof(*key));
|
||||
+ memcpy(key, eth_hdr(skb), 2 * ETH_ALEN);
|
||||
+#ifdef CONFIG_BRIDGE_VLAN_FILTERING
|
||||
+ if (!br_opt_get(p->br, BROPT_VLAN_ENABLED))
|
||||
+ return;
|
||||
+
|
||||
+ if (!skb_vlan_tag_present(skb) || skb->vlan_proto != p->br->vlan_proto)
|
||||
+ return;
|
||||
+
|
||||
+ key->vlan_present = true;
|
||||
+ key->vlan_tag = skb_vlan_tag_get_id(skb);
|
||||
+#endif
|
||||
+}
|
||||
+
|
||||
+void br_offload_output(struct sk_buff *skb)
|
||||
+{
|
||||
+ struct net_bridge_port_offload *o;
|
||||
+ struct br_input_skb_cb *cb = (struct br_input_skb_cb *)skb->cb;
|
||||
+ struct net_bridge_port *p, *inp;
|
||||
+ struct net_device *dev;
|
||||
+ struct net_bridge_fdb_entry *fdb_in, *fdb_out;
|
||||
+ struct net_bridge_vlan_group *vg;
|
||||
+ struct bridge_flow_key key;
|
||||
+ struct bridge_flow *flow;
|
||||
+ u16 vlan;
|
||||
+
|
||||
+ if (!cb->offload)
|
||||
+ return;
|
||||
+
|
||||
+ rcu_read_lock();
|
||||
+
|
||||
+ p = br_port_get_rcu(skb->dev);
|
||||
+ if (!p)
|
||||
+ goto out;
|
||||
+
|
||||
+ o = &p->offload;
|
||||
+ if (!o->enabled)
|
||||
+ goto out;
|
||||
+
|
||||
+ if (atomic_read(&p->offload.rht.nelems) >= p->br->offload_cache_size)
|
||||
+ goto out;
|
||||
+
|
||||
+ dev = dev_get_by_index_rcu(dev_net(p->br->dev), cb->input_ifindex);
|
||||
+ if (!dev)
|
||||
+ goto out;
|
||||
+
|
||||
+ inp = br_port_get_rcu(dev);
|
||||
+ if (!inp)
|
||||
+ goto out;
|
||||
+
|
||||
+ vg = nbp_vlan_group_rcu(inp);
|
||||
+ vlan = cb->input_vlan_present ? cb->input_vlan_tag : br_get_pvid(vg);
|
||||
+ fdb_in = br_fdb_find_rcu(p->br, eth_hdr(skb)->h_source, vlan);
|
||||
+ if (!fdb_in || !fdb_in->dst)
|
||||
+ goto out;
|
||||
+
|
||||
+ vg = nbp_vlan_group_rcu(p);
|
||||
+ vlan = skb_vlan_tag_present(skb) ? skb_vlan_tag_get_id(skb) : br_get_pvid(vg);
|
||||
+ fdb_out = br_fdb_find_rcu(p->br, eth_hdr(skb)->h_dest, vlan);
|
||||
+ if (!fdb_out || !fdb_out->dst)
|
||||
+ goto out;
|
||||
+
|
||||
+ br_offload_prepare_key(p, &key, skb);
|
||||
+#ifdef CONFIG_BRIDGE_VLAN_FILTERING
|
||||
+ key.vlan_present = cb->input_vlan_present;
|
||||
+ key.vlan_tag = cb->input_vlan_tag;
|
||||
+#endif
|
||||
+
|
||||
+ flow = kmem_cache_alloc(offload_cache, GFP_ATOMIC);
|
||||
+ flow->port = inp;
|
||||
+ memcpy(&flow->key, &key, sizeof(key));
|
||||
+
|
||||
+#ifdef CONFIG_BRIDGE_VLAN_FILTERING
|
||||
+ flow->vlan_out_present = skb_vlan_tag_present(skb);
|
||||
+ flow->vlan_out = skb_vlan_tag_get(skb);
|
||||
+#endif
|
||||
+
|
||||
+ flow->fdb_in = fdb_in;
|
||||
+ flow->fdb_out = fdb_out;
|
||||
+ flow->used = jiffies;
|
||||
+
|
||||
+ spin_lock_bh(&offload_lock);
|
||||
+ if (!o->enabled ||
|
||||
+ atomic_read(&p->offload.rht.nelems) >= p->br->offload_cache_size ||
|
||||
+ rhashtable_insert_fast(&inp->offload.rht, &flow->node, flow_params)) {
|
||||
+ kmem_cache_free(offload_cache, flow);
|
||||
+ goto out_unlock;
|
||||
+ }
|
||||
+
|
||||
+ hlist_add_head(&flow->fdb_list_in, &fdb_in->offload_in);
|
||||
+ hlist_add_head(&flow->fdb_list_out, &fdb_out->offload_out);
|
||||
+
|
||||
+ if (br_offload_need_gc(p))
|
||||
+ queue_work(system_long_wq, &p->offload.gc_work);
|
||||
+
|
||||
+out_unlock:
|
||||
+ spin_unlock_bh(&offload_lock);
|
||||
+
|
||||
+out:
|
||||
+ rcu_read_unlock();
|
||||
+}
|
||||
+
|
||||
+bool br_offload_input(struct net_bridge_port *p, struct sk_buff *skb)
|
||||
+{
|
||||
+ struct net_bridge_port_offload *o = &p->offload;
|
||||
+ struct br_input_skb_cb *cb = (struct br_input_skb_cb *)skb->cb;
|
||||
+ struct bridge_flow_key key;
|
||||
+ struct net_bridge_port *dst;
|
||||
+ struct bridge_flow *flow;
|
||||
+ unsigned long now = jiffies;
|
||||
+ bool ret = false;
|
||||
+
|
||||
+ if (skb->len < sizeof(key))
|
||||
+ return false;
|
||||
+
|
||||
+ if (!o->enabled)
|
||||
+ return false;
|
||||
+
|
||||
+ if (is_multicast_ether_addr(eth_hdr(skb)->h_dest))
|
||||
+ return false;
|
||||
+
|
||||
+ br_offload_prepare_key(p, &key, skb);
|
||||
+
|
||||
+ rcu_read_lock();
|
||||
+ flow = rhashtable_lookup(&o->rht, &key, flow_params);
|
||||
+ if (!flow) {
|
||||
+ cb->offload = 1;
|
||||
+#ifdef CONFIG_BRIDGE_VLAN_FILTERING
|
||||
+ cb->input_vlan_present = key.vlan_present != 0;
|
||||
+ cb->input_vlan_tag = key.vlan_tag;
|
||||
+#endif
|
||||
+ cb->input_ifindex = p->dev->ifindex;
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
+ if (flow->fdb_in->dst != p)
|
||||
+ goto out;
|
||||
+
|
||||
+ dst = flow->fdb_out->dst;
|
||||
+ if (!dst)
|
||||
+ goto out;
|
||||
+
|
||||
+ ret = true;
|
||||
+#ifdef CONFIG_BRIDGE_VLAN_FILTERING
|
||||
+ if (!flow->vlan_out_present && key.vlan_present) {
|
||||
+ __vlan_hwaccel_clear_tag(skb);
|
||||
+ } else if (flow->vlan_out_present) {
|
||||
+ if (skb_vlan_tag_present(skb) &&
|
||||
+ skb->vlan_proto != p->br->vlan_proto) {
|
||||
+ /* Protocol-mismatch, empty out vlan_tci for new tag */
|
||||
+ skb_push(skb, ETH_HLEN);
|
||||
+ skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto,
|
||||
+ skb_vlan_tag_get(skb));
|
||||
+ if (unlikely(!skb))
|
||||
+ goto out;
|
||||
+
|
||||
+ skb_pull(skb, ETH_HLEN);
|
||||
+ skb_reset_mac_len(skb);
|
||||
+ }
|
||||
+
|
||||
+ __vlan_hwaccel_put_tag(skb, p->br->vlan_proto,
|
||||
+ flow->vlan_out);
|
||||
+ }
|
||||
+#endif
|
||||
+
|
||||
+ skb->dev = dst->dev;
|
||||
+ skb_push(skb, ETH_HLEN);
|
||||
+
|
||||
+ if (skb_warn_if_lro(skb) || !is_skb_forwardable(skb->dev, skb)) {
|
||||
+ kfree_skb(skb);
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
+ if (now - flow->used >= HZ) {
|
||||
+ flow->used = now;
|
||||
+ br_offload_flow_refresh_time(flow);
|
||||
+ }
|
||||
+
|
||||
+ skb_forward_csum(skb);
|
||||
+ dev_queue_xmit(skb);
|
||||
+
|
||||
+out:
|
||||
+ rcu_read_unlock();
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+br_offload_check_gc(struct net_bridge *br)
|
||||
+{
|
||||
+ struct net_bridge_port *p;
|
||||
+
|
||||
+ spin_lock_bh(&br->lock);
|
||||
+ list_for_each_entry(p, &br->port_list, list)
|
||||
+ if (br_offload_need_gc(p))
|
||||
+ queue_work(system_long_wq, &p->offload.gc_work);
|
||||
+ spin_unlock_bh(&br->lock);
|
||||
+}
|
||||
+
|
||||
+
|
||||
+int br_offload_set_cache_size(struct net_bridge *br, unsigned long val,
|
||||
+ struct netlink_ext_ack *extack)
|
||||
+{
|
||||
+ br->offload_cache_size = val;
|
||||
+ br_offload_check_gc(br);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+int br_offload_set_cache_reserved(struct net_bridge *br, unsigned long val,
|
||||
+ struct netlink_ext_ack *extack)
|
||||
+{
|
||||
+ br->offload_cache_reserved = val;
|
||||
+ br_offload_check_gc(br);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+int __init br_offload_init(void)
|
||||
+{
|
||||
+ offload_cache = kmem_cache_create("bridge_offload_cache",
|
||||
+ sizeof(struct bridge_flow),
|
||||
+ 0, SLAB_HWCACHE_ALIGN, NULL);
|
||||
+ if (!offload_cache)
|
||||
+ return -ENOMEM;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+void br_offload_fini(void)
|
||||
+{
|
||||
+ kmem_cache_destroy(offload_cache);
|
||||
+}
|
||||
--- a/net/bridge/br_private.h
|
||||
+++ b/net/bridge/br_private.h
|
||||
@@ -271,7 +271,13 @@ struct net_bridge_fdb_entry {
|
||||
unsigned long updated ____cacheline_aligned_in_smp;
|
||||
unsigned long used;
|
||||
|
||||
- struct rcu_head rcu;
|
||||
+ union {
|
||||
+ struct {
|
||||
+ struct hlist_head offload_in;
|
||||
+ struct hlist_head offload_out;
|
||||
+ };
|
||||
+ struct rcu_head rcu;
|
||||
+ };
|
||||
};
|
||||
|
||||
struct net_bridge_fdb_flush_desc {
|
||||
@@ -353,6 +359,12 @@ struct net_bridge_mdb_entry {
|
||||
struct rcu_head rcu;
|
||||
};
|
||||
|
||||
+struct net_bridge_port_offload {
|
||||
+ struct rhashtable rht;
|
||||
+ struct work_struct gc_work;
|
||||
+ bool enabled;
|
||||
+};
|
||||
+
|
||||
struct net_bridge_port {
|
||||
struct net_bridge *br;
|
||||
struct net_device *dev;
|
||||
@@ -414,6 +426,7 @@ struct net_bridge_port {
|
||||
u16 backup_redirected_cnt;
|
||||
|
||||
struct bridge_stp_xstats stp_xstats;
|
||||
+ struct net_bridge_port_offload offload;
|
||||
};
|
||||
|
||||
#define kobj_to_brport(obj) container_of(obj, struct net_bridge_port, kobj)
|
||||
@@ -531,6 +544,9 @@ struct net_bridge {
|
||||
struct kobject *ifobj;
|
||||
u32 auto_cnt;
|
||||
|
||||
+ u32 offload_cache_size;
|
||||
+ u32 offload_cache_reserved;
|
||||
+
|
||||
#ifdef CONFIG_NET_SWITCHDEV
|
||||
/* Counter used to make sure that hardware domains get unique
|
||||
* identifiers in case a bridge spans multiple switchdev instances.
|
||||
@@ -565,6 +581,10 @@ struct br_input_skb_cb {
|
||||
#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
|
||||
u8 br_netfilter_broute:1;
|
||||
#endif
|
||||
+ u8 offload:1;
|
||||
+ u8 input_vlan_present:1;
|
||||
+ u16 input_vlan_tag;
|
||||
+ int input_ifindex;
|
||||
|
||||
#ifdef CONFIG_NET_SWITCHDEV
|
||||
/* Set if TX data plane offloading is used towards at least one
|
||||
--- /dev/null
|
||||
+++ b/net/bridge/br_private_offload.h
|
||||
@@ -0,0 +1,23 @@
|
||||
+#ifndef __BR_OFFLOAD_H
|
||||
+#define __BR_OFFLOAD_H
|
||||
+
|
||||
+bool br_offload_input(struct net_bridge_port *p, struct sk_buff *skb);
|
||||
+void br_offload_output(struct sk_buff *skb);
|
||||
+void br_offload_port_state(struct net_bridge_port *p);
|
||||
+void br_offload_fdb_update(const struct net_bridge_fdb_entry *fdb);
|
||||
+int br_offload_init(void);
|
||||
+void br_offload_fini(void);
|
||||
+int br_offload_set_cache_size(struct net_bridge *br, unsigned long val,
|
||||
+ struct netlink_ext_ack *extack);
|
||||
+int br_offload_set_cache_reserved(struct net_bridge *br, unsigned long val,
|
||||
+ struct netlink_ext_ack *extack);
|
||||
+
|
||||
+static inline void br_offload_skb_disable(struct sk_buff *skb)
|
||||
+{
|
||||
+ struct br_input_skb_cb *cb = (struct br_input_skb_cb *)skb->cb;
|
||||
+
|
||||
+ if (cb->offload)
|
||||
+ cb->offload = 0;
|
||||
+}
|
||||
+
|
||||
+#endif
|
||||
--- a/net/bridge/br_stp.c
|
||||
+++ b/net/bridge/br_stp.c
|
||||
@@ -12,6 +12,7 @@
|
||||
|
||||
#include "br_private.h"
|
||||
#include "br_private_stp.h"
|
||||
+#include "br_private_offload.h"
|
||||
|
||||
/* since time values in bpdu are in jiffies and then scaled (1/256)
|
||||
* before sending, make sure that is at least one STP tick.
|
||||
@@ -58,6 +59,8 @@ void br_set_state(struct net_bridge_port
|
||||
(unsigned int) p->port_no, p->dev->name,
|
||||
br_port_state_names[p->state]);
|
||||
|
||||
+ br_offload_port_state(p);
|
||||
+
|
||||
if (p->br->stp_enabled == BR_KERNEL_STP) {
|
||||
switch (p->state) {
|
||||
case BR_STATE_BLOCKING:
|
||||
--- a/net/bridge/br_sysfs_br.c
|
||||
+++ b/net/bridge/br_sysfs_br.c
|
||||
@@ -18,6 +18,7 @@
|
||||
#include <linux/sched/signal.h>
|
||||
|
||||
#include "br_private.h"
|
||||
+#include "br_private_offload.h"
|
||||
|
||||
/* IMPORTANT: new bridge options must be added with netlink support only
|
||||
* please do not add new sysfs entries
|
||||
@@ -933,6 +934,38 @@ static ssize_t vlan_stats_per_port_store
|
||||
static DEVICE_ATTR_RW(vlan_stats_per_port);
|
||||
#endif
|
||||
|
||||
+static ssize_t offload_cache_size_show(struct device *d,
|
||||
+ struct device_attribute *attr,
|
||||
+ char *buf)
|
||||
+{
|
||||
+ struct net_bridge *br = to_bridge(d);
|
||||
+ return sprintf(buf, "%u\n", br->offload_cache_size);
|
||||
+}
|
||||
+
|
||||
+static ssize_t offload_cache_size_store(struct device *d,
|
||||
+ struct device_attribute *attr,
|
||||
+ const char *buf, size_t len)
|
||||
+{
|
||||
+ return store_bridge_parm(d, buf, len, br_offload_set_cache_size);
|
||||
+}
|
||||
+static DEVICE_ATTR_RW(offload_cache_size);
|
||||
+
|
||||
+static ssize_t offload_cache_reserved_show(struct device *d,
|
||||
+ struct device_attribute *attr,
|
||||
+ char *buf)
|
||||
+{
|
||||
+ struct net_bridge *br = to_bridge(d);
|
||||
+ return sprintf(buf, "%u\n", br->offload_cache_reserved);
|
||||
+}
|
||||
+
|
||||
+static ssize_t offload_cache_reserved_store(struct device *d,
|
||||
+ struct device_attribute *attr,
|
||||
+ const char *buf, size_t len)
|
||||
+{
|
||||
+ return store_bridge_parm(d, buf, len, br_offload_set_cache_reserved);
|
||||
+}
|
||||
+static DEVICE_ATTR_RW(offload_cache_reserved);
|
||||
+
|
||||
static struct attribute *bridge_attrs[] = {
|
||||
&dev_attr_forward_delay.attr,
|
||||
&dev_attr_hello_time.attr,
|
||||
@@ -987,6 +1020,8 @@ static struct attribute *bridge_attrs[]
|
||||
&dev_attr_vlan_stats_enabled.attr,
|
||||
&dev_attr_vlan_stats_per_port.attr,
|
||||
#endif
|
||||
+ &dev_attr_offload_cache_size.attr,
|
||||
+ &dev_attr_offload_cache_reserved.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
--- a/net/bridge/br_sysfs_if.c
|
||||
+++ b/net/bridge/br_sysfs_if.c
|
||||
@@ -241,6 +241,7 @@ BRPORT_ATTR_FLAG(broadcast_flood, BR_BCA
|
||||
BRPORT_ATTR_FLAG(neigh_suppress, BR_NEIGH_SUPPRESS);
|
||||
BRPORT_ATTR_FLAG(isolated, BR_ISOLATED);
|
||||
BRPORT_ATTR_FLAG(bpdu_filter, BR_BPDU_FILTER);
|
||||
+BRPORT_ATTR_FLAG(offload, BR_OFFLOAD);
|
||||
|
||||
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
|
||||
static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf)
|
||||
@@ -295,6 +296,7 @@ static const struct brport_attribute *br
|
||||
&brport_attr_isolated,
|
||||
&brport_attr_bpdu_filter,
|
||||
&brport_attr_backup_port,
|
||||
+ &brport_attr_offload,
|
||||
NULL
|
||||
};
|
||||
|
||||
--- a/net/bridge/br_vlan_tunnel.c
|
||||
+++ b/net/bridge/br_vlan_tunnel.c
|
||||
@@ -15,6 +15,7 @@
|
||||
|
||||
#include "br_private.h"
|
||||
#include "br_private_tunnel.h"
|
||||
+#include "br_private_offload.h"
|
||||
|
||||
static inline int br_vlan_tunid_cmp(struct rhashtable_compare_arg *arg,
|
||||
const void *ptr)
|
||||
@@ -180,6 +181,7 @@ void br_handle_ingress_vlan_tunnel(struc
|
||||
skb_dst_drop(skb);
|
||||
|
||||
__vlan_hwaccel_put_tag(skb, p->br->vlan_proto, vlan->vid);
|
||||
+ br_offload_skb_disable(skb);
|
||||
}
|
||||
|
||||
int br_handle_egress_vlan_tunnel(struct sk_buff *skb,
|
||||
@@ -201,6 +203,7 @@ int br_handle_egress_vlan_tunnel(struct
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
+ br_offload_skb_disable(skb);
|
||||
tunnel_dst = rcu_dereference(vlan->tinfo.tunnel_dst);
|
||||
if (tunnel_dst && dst_hold_safe(&tunnel_dst->dst))
|
||||
skb_dst_set(skb, &tunnel_dst->dst);
|
@ -0,0 +1,112 @@
|
||||
From: Yousong Zhou <yszhou4tech@gmail.com>
|
||||
Subject: [PATCH] ath79: add nvmem cell mac-address-ascii support
|
||||
|
||||
This is needed for devices with mac address stored in ascii format, e.g.
|
||||
HiWiFi HC6361 to be ported in the following patch.
|
||||
|
||||
Submitted-by: Yousong Zhou <yszhou4tech@gmail.com>
|
||||
---
|
||||
net/ethernet/eth.c | 83 ++++++++++++------
|
||||
1 files changed, 72 insertions(+), 11 deletions(-)
|
||||
|
||||
--- a/net/ethernet/eth.c
|
||||
+++ b/net/ethernet/eth.c
|
||||
@@ -531,6 +531,63 @@ int eth_platform_get_mac_address(struct
|
||||
}
|
||||
EXPORT_SYMBOL(eth_platform_get_mac_address);
|
||||
|
||||
+static void *nvmem_cell_get_mac_address(struct nvmem_cell *cell)
|
||||
+{
|
||||
+ size_t len;
|
||||
+ void *mac;
|
||||
+
|
||||
+ mac = nvmem_cell_read(cell, &len);
|
||||
+ if (IS_ERR(mac))
|
||||
+ return PTR_ERR(mac);
|
||||
+ if (len != ETH_ALEN) {
|
||||
+ kfree(mac);
|
||||
+ return ERR_PTR(-EINVAL);
|
||||
+ }
|
||||
+ return mac;
|
||||
+}
|
||||
+
|
||||
+static void *nvmem_cell_get_mac_address_ascii(struct nvmem_cell *cell)
|
||||
+{
|
||||
+ size_t len;
|
||||
+ int ret;
|
||||
+ void *mac_ascii;
|
||||
+ u8 *mac;
|
||||
+
|
||||
+ mac_ascii = nvmem_cell_read(cell, &len);
|
||||
+ if (IS_ERR(mac_ascii))
|
||||
+ return PTR_ERR(mac_ascii);
|
||||
+ if (len != ETH_ALEN*2+5) {
|
||||
+ kfree(mac_ascii);
|
||||
+ return ERR_PTR(-EINVAL);
|
||||
+ }
|
||||
+ mac = kmalloc(ETH_ALEN, GFP_KERNEL);
|
||||
+ if (!mac) {
|
||||
+ kfree(mac_ascii);
|
||||
+ return ERR_PTR(-ENOMEM);
|
||||
+ }
|
||||
+ ret = sscanf(mac_ascii, "%2hhx:%2hhx:%2hhx:%2hhx:%2hhx:%2hhx",
|
||||
+ &mac[0], &mac[1], &mac[2],
|
||||
+ &mac[3], &mac[4], &mac[5]);
|
||||
+ kfree(mac_ascii);
|
||||
+ if (ret == ETH_ALEN)
|
||||
+ return mac;
|
||||
+ kfree(mac);
|
||||
+ return ERR_PTR(-EINVAL);
|
||||
+}
|
||||
+
|
||||
+static struct nvmem_cell_mac_address_property {
|
||||
+ char *name;
|
||||
+ void *(*read)(struct nvmem_cell *);
|
||||
+} nvmem_cell_mac_address_properties[] = {
|
||||
+ {
|
||||
+ .name = "mac-address",
|
||||
+ .read = nvmem_cell_get_mac_address,
|
||||
+ }, {
|
||||
+ .name = "mac-address-ascii",
|
||||
+ .read = nvmem_cell_get_mac_address_ascii,
|
||||
+ },
|
||||
+};
|
||||
+
|
||||
/**
|
||||
* platform_get_ethdev_address - Set netdev's MAC address from a given device
|
||||
* @dev: Pointer to the device
|
||||
@@ -564,19 +621,23 @@ int nvmem_get_mac_address(struct device
|
||||
{
|
||||
struct nvmem_cell *cell;
|
||||
const void *mac;
|
||||
- size_t len;
|
||||
+ struct nvmem_cell_mac_address_property *property;
|
||||
+ int i;
|
||||
|
||||
- cell = nvmem_cell_get(dev, "mac-address");
|
||||
- if (IS_ERR(cell))
|
||||
- return PTR_ERR(cell);
|
||||
-
|
||||
- mac = nvmem_cell_read(cell, &len);
|
||||
- nvmem_cell_put(cell);
|
||||
-
|
||||
- if (IS_ERR(mac))
|
||||
- return PTR_ERR(mac);
|
||||
+ for (i = 0; i < ARRAY_SIZE(nvmem_cell_mac_address_properties); i++) {
|
||||
+ property = &nvmem_cell_mac_address_properties[i];
|
||||
+ cell = nvmem_cell_get(dev, property->name);
|
||||
+ if (IS_ERR(cell)) {
|
||||
+ if (i == ARRAY_SIZE(nvmem_cell_mac_address_properties) - 1)
|
||||
+ return PTR_ERR(cell);
|
||||
+ continue;
|
||||
+ }
|
||||
+ mac = property->read(cell);
|
||||
+ nvmem_cell_put(cell);
|
||||
+ break;
|
||||
+ }
|
||||
|
||||
- if (len != ETH_ALEN || !is_valid_ether_addr(mac)) {
|
||||
+ if (!is_valid_ether_addr(mac)) {
|
||||
kfree(mac);
|
||||
return -EINVAL;
|
||||
}
|
@ -83,7 +83,7 @@ Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
|
||||
|
||||
--- a/include/uapi/linux/netfilter/xt_connmark.h
|
||||
+++ b/include/uapi/linux/netfilter/xt_connmark.h
|
||||
@@ -20,6 +20,11 @@ enum {
|
||||
@@ -15,6 +15,11 @@ enum {
|
||||
};
|
||||
|
||||
enum {
|
||||
@ -95,7 +95,7 @@ Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
|
||||
D_SHIFT_LEFT = 0,
|
||||
D_SHIFT_RIGHT,
|
||||
};
|
||||
@@ -34,6 +39,11 @@ struct xt_connmark_tginfo2 {
|
||||
@@ -29,6 +34,11 @@ struct xt_connmark_tginfo2 {
|
||||
__u8 shift_dir, shift_bits, mode;
|
||||
};
|
||||
|
||||
@ -109,7 +109,7 @@ Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
|
||||
__u8 invert;
|
||||
--- a/net/netfilter/xt_connmark.c
|
||||
+++ b/net/netfilter/xt_connmark.c
|
||||
@@ -24,12 +24,13 @@ MODULE_ALIAS("ipt_connmark");
|
||||
@@ -24,13 +24,14 @@ MODULE_ALIAS("ipt_connmark");
|
||||
MODULE_ALIAS("ip6t_connmark");
|
||||
|
||||
static unsigned int
|
||||
@ -120,15 +120,16 @@ Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
|
||||
u_int32_t new_targetmark;
|
||||
struct nf_conn *ct;
|
||||
u_int32_t newmark;
|
||||
u_int32_t oldmark;
|
||||
+ u_int8_t dscp;
|
||||
|
||||
ct = nf_ct_get(skb, &ctinfo);
|
||||
if (ct == NULL)
|
||||
@@ -37,12 +38,24 @@ connmark_tg_shift(struct sk_buff *skb, c
|
||||
|
||||
@@ -39,12 +40,24 @@ connmark_tg_shift(struct sk_buff *skb, c
|
||||
switch (info->mode) {
|
||||
case XT_CONNMARK_SET:
|
||||
- newmark = (ct->mark & ~info->ctmask) ^ info->ctmark;
|
||||
oldmark = READ_ONCE(ct->mark);
|
||||
- newmark = (oldmark & ~info->ctmask) ^ info->ctmark;
|
||||
- if (info->shift_dir == D_SHIFT_RIGHT)
|
||||
- newmark >>= info->shift_bits;
|
||||
- else
|
||||
@ -151,10 +152,10 @@ Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
|
||||
+ newmark = (newmark & ~info->ctmark) |
|
||||
+ (info->ctmask | (dscp << info->shift_bits));
|
||||
+ }
|
||||
if (ct->mark != newmark) {
|
||||
ct->mark = newmark;
|
||||
if (READ_ONCE(ct->mark) != newmark) {
|
||||
WRITE_ONCE(ct->mark, newmark);
|
||||
nf_conntrack_event_cache(IPCT_MARK, ct);
|
||||
@@ -81,20 +94,36 @@ static unsigned int
|
||||
@@ -83,20 +96,36 @@ static unsigned int
|
||||
connmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
|
||||
{
|
||||
const struct xt_connmark_tginfo1 *info = par->targinfo;
|
||||
@ -193,7 +194,7 @@ Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
|
||||
|
||||
return connmark_tg_shift(skb, info);
|
||||
}
|
||||
@@ -165,6 +194,16 @@ static struct xt_target connmark_tg_reg[
|
||||
@@ -167,6 +196,16 @@ static struct xt_target connmark_tg_reg[
|
||||
.targetsize = sizeof(struct xt_connmark_tginfo2),
|
||||
.destroy = connmark_tg_destroy,
|
||||
.me = THIS_MODULE,
|
@ -8,30 +8,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
|
||||
--- a/net/netfilter/Kconfig
|
||||
+++ b/net/netfilter/Kconfig
|
||||
@@ -712,8 +712,6 @@ config NFT_REJECT_NETDEV
|
||||
|
||||
endif # NF_TABLES_NETDEV
|
||||
|
||||
-endif # NF_TABLES
|
||||
-
|
||||
config NF_FLOW_TABLE_INET
|
||||
tristate "Netfilter flow table mixed IPv4/IPv6 module"
|
||||
depends on NF_FLOW_TABLE
|
||||
@@ -722,11 +720,12 @@ config NF_FLOW_TABLE_INET
|
||||
|
||||
To compile it as a module, choose M here.
|
||||
|
||||
+endif # NF_TABLES
|
||||
+
|
||||
config NF_FLOW_TABLE
|
||||
tristate "Netfilter flow table module"
|
||||
depends on NETFILTER_INGRESS
|
||||
depends on NF_CONNTRACK
|
||||
- depends on NF_TABLES
|
||||
help
|
||||
This option adds the flow table core infrastructure.
|
||||
|
||||
@@ -1023,6 +1022,15 @@ config NETFILTER_XT_TARGET_NOTRACK
|
||||
@@ -1023,6 +1023,15 @@ config NETFILTER_XT_TARGET_NOTRACK
|
||||
depends on NETFILTER_ADVANCED
|
||||
select NETFILTER_XT_TARGET_CT
|
||||
|
||||
@ -49,7 +26,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
depends on NETFILTER_ADVANCED
|
||||
--- a/net/netfilter/Makefile
|
||||
+++ b/net/netfilter/Makefile
|
||||
@@ -148,6 +148,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIF
|
||||
@@ -154,6 +154,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIF
|
||||
obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
|
||||
obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
|
||||
obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
|
||||
@ -59,7 +36,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
|
||||
--- /dev/null
|
||||
+++ b/net/netfilter/xt_FLOWOFFLOAD.c
|
||||
@@ -0,0 +1,694 @@
|
||||
@@ -0,0 +1,697 @@
|
||||
+/*
|
||||
+ * Copyright (C) 2018-2021 Felix Fietkau <nbd@nbd.name>
|
||||
+ *
|
||||
@ -250,13 +227,16 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+xt_flowoffload_check_hook(struct flow_offload *flow, void *data)
|
||||
+xt_flowoffload_check_hook(struct nf_flowtable *flowtable,
|
||||
+ struct flow_offload *flow, void *data)
|
||||
+{
|
||||
+ struct xt_flowoffload_table *table = data;
|
||||
+ struct xt_flowoffload_table *table;
|
||||
+ struct flow_offload_tuple *tuple0 = &flow->tuplehash[0].tuple;
|
||||
+ struct flow_offload_tuple *tuple1 = &flow->tuplehash[1].tuple;
|
||||
+ struct xt_flowoffload_hook *hook;
|
||||
+
|
||||
+ table = container_of(flowtable, struct xt_flowoffload_table, ft);
|
||||
+
|
||||
+ spin_lock_bh(&hooks_lock);
|
||||
+ hlist_for_each_entry(hook, &table->hooks, list) {
|
||||
+ if (hook->ops.dev->ifindex != tuple0->iifidx &&
|
||||
@ -283,8 +263,8 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
+ hook->used = false;
|
||||
+ spin_unlock_bh(&hooks_lock);
|
||||
+
|
||||
+
|
||||
+
|
||||
+ err = nf_flow_table_iterate(&table->ft, xt_flowoffload_check_hook,
|
||||
+ NULL);
|
||||
+ if (err && err != -EAGAIN)
|
||||
+ goto out;
|
||||
+
|
||||
@ -754,6 +734,34 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
+MODULE_LICENSE("GPL");
|
||||
+module_init(xt_flowoffload_tg_init);
|
||||
+module_exit(xt_flowoffload_tg_exit);
|
||||
--- a/net/netfilter/nf_flow_table_core.c
|
||||
+++ b/net/netfilter/nf_flow_table_core.c
|
||||
@@ -7,7 +7,6 @@
|
||||
#include <linux/netdevice.h>
|
||||
#include <net/ip.h>
|
||||
#include <net/ip6_route.h>
|
||||
-#include <net/netfilter/nf_tables.h>
|
||||
#include <net/netfilter/nf_flow_table.h>
|
||||
#include <net/netfilter/nf_conntrack.h>
|
||||
#include <net/netfilter/nf_conntrack_core.h>
|
||||
@@ -381,8 +380,7 @@ flow_offload_lookup(struct nf_flowtable
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(flow_offload_lookup);
|
||||
|
||||
-static int
|
||||
-nf_flow_table_iterate(struct nf_flowtable *flow_table,
|
||||
+int nf_flow_table_iterate(struct nf_flowtable *flow_table,
|
||||
void (*iter)(struct nf_flowtable *flowtable,
|
||||
struct flow_offload *flow, void *data),
|
||||
void *data)
|
||||
@@ -436,6 +434,7 @@ static void nf_flow_offload_gc_step(stru
|
||||
nf_flow_offload_stats(flow_table, flow);
|
||||
}
|
||||
}
|
||||
+EXPORT_SYMBOL_GPL(nf_flow_table_iterate);
|
||||
|
||||
void nf_flow_table_gc_run(struct nf_flowtable *flow_table)
|
||||
{
|
||||
--- /dev/null
|
||||
+++ b/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
|
||||
@@ -0,0 +1,17 @@
|
||||
@ -774,3 +782,17 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
+};
|
||||
+
|
||||
+#endif /* _XT_FLOWOFFLOAD_H */
|
||||
--- a/include/net/netfilter/nf_flow_table.h
|
||||
+++ b/include/net/netfilter/nf_flow_table.h
|
||||
@@ -280,6 +280,11 @@ void nf_flow_table_free(struct nf_flowta
|
||||
|
||||
void flow_offload_teardown(struct flow_offload *flow);
|
||||
|
||||
+int nf_flow_table_iterate(struct nf_flowtable *flow_table,
|
||||
+ void (*iter)(struct nf_flowtable *flowtable,
|
||||
+ struct flow_offload *flow, void *data),
|
||||
+ void *data);
|
||||
+
|
||||
void nf_flow_snat_port(const struct flow_offload *flow,
|
||||
struct sk_buff *skb, unsigned int thoff,
|
||||
u8 protocol, enum flow_offload_tuple_dir dir);
|
@ -11,7 +11,7 @@ Signed-off-by: Imre Kaloz <kaloz@openwrt.org>
|
||||
|
||||
--- a/include/linux/netdevice.h
|
||||
+++ b/include/linux/netdevice.h
|
||||
@@ -149,8 +149,8 @@ static inline bool dev_xmit_complete(int
|
||||
@@ -150,8 +150,8 @@ static inline bool dev_xmit_complete(int
|
||||
|
||||
#if defined(CONFIG_HYPERV_NET)
|
||||
# define LL_MAX_HEADER 128
|
@ -13,7 +13,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
|
||||
--- a/net/sched/sch_fq_codel.c
|
||||
+++ b/net/sched/sch_fq_codel.c
|
||||
@@ -474,7 +474,11 @@ static int fq_codel_init(struct Qdisc *s
|
||||
@@ -471,7 +471,11 @@ static int fq_codel_init(struct Qdisc *s
|
||||
|
||||
sch->limit = 10*1024;
|
||||
q->flows_cnt = 1024;
|
@ -0,0 +1,25 @@
|
||||
From 804fbb3f2ec9283f7b778e057a68bfff440a0be6 Mon Sep 17 00:00:00 2001
|
||||
From: Rui Salvaterra <rsalvaterra@gmail.com>
|
||||
Date: Wed, 30 Mar 2022 22:51:55 +0100
|
||||
Subject: [PATCH] kernel: ct: size the hashtable more adequately
|
||||
|
||||
To set the default size of the connection tracking hash table, a divider of
|
||||
16384 becomes inadequate for a router handling lots of connections. Divide by
|
||||
2048 instead, making the default size scale better with the available RAM.
|
||||
|
||||
Signed-off-by: Rui Salvaterra <rsalvaterra@gmail.com>
|
||||
---
|
||||
net/netfilter/nf_conntrack_core.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/net/netfilter/nf_conntrack_core.c
|
||||
+++ b/net/netfilter/nf_conntrack_core.c
|
||||
@@ -2698,7 +2698,7 @@ int nf_conntrack_init_start(void)
|
||||
|
||||
if (!nf_conntrack_htable_size) {
|
||||
nf_conntrack_htable_size
|
||||
- = (((nr_pages << PAGE_SHIFT) / 16384)
|
||||
+ = (((nr_pages << PAGE_SHIFT) / 2048)
|
||||
/ sizeof(struct hlist_head));
|
||||
if (BITS_PER_LONG >= 64 &&
|
||||
nr_pages > (4 * (1024 * 1024 * 1024 / PAGE_SIZE)))
|
@ -36,8 +36,9 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
+ Support for FC is very limited.
|
||||
+
|
||||
+config AR8216_PHY
|
||||
+ tristate "Driver for Atheros AR8216 switches"
|
||||
+ tristate "Driver for Atheros AR8216/8327 switches"
|
||||
+ select SWCONFIG
|
||||
+ select ETHERNET_PACKET_MANGLE
|
||||
+
|
||||
+config AR8216_PHY_LEDS
|
||||
+ bool "Atheros AR8216 switch LED support"
|
||||
@ -52,7 +53,6 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
+config PSB6970_PHY
|
||||
+ tristate "Lantiq XWAY Tantos (PSB6970) Ethernet switch"
|
||||
+ select SWCONFIG
|
||||
+ select ETHERNET_PACKET_MANGLE
|
||||
+
|
||||
+config RTL8306_PHY
|
||||
+ tristate "Driver for Realtek RTL8306S switches"
|
||||
@ -95,13 +95,15 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
config AMD_PHY
|
||||
--- a/drivers/net/phy/Makefile
|
||||
+++ b/drivers/net/phy/Makefile
|
||||
@@ -24,6 +24,19 @@ libphy-$(CONFIG_LED_TRIGGER_PHY) += phy_
|
||||
@@ -24,6 +24,21 @@ libphy-$(CONFIG_LED_TRIGGER_PHY) += phy_
|
||||
obj-$(CONFIG_PHYLINK) += phylink.o
|
||||
obj-$(CONFIG_PHYLIB) += libphy.o
|
||||
|
||||
+obj-$(CONFIG_SWCONFIG) += swconfig.o
|
||||
+obj-$(CONFIG_ADM6996_PHY) += adm6996.o
|
||||
+obj-$(CONFIG_AR8216_PHY) += ar8216.o ar8327.o
|
||||
+obj-$(CONFIG_AR8216_PHY) += ar8xxx.o
|
||||
+ar8xxx-y += ar8216.o
|
||||
+ar8xxx-y += ar8327.o
|
||||
+obj-$(CONFIG_SWCONFIG_B53) += b53/
|
||||
+obj-$(CONFIG_IP17XX_PHY) += ip17xx.o
|
||||
+obj-$(CONFIG_PSB6970_PHY) += psb6970.o
|
@ -0,0 +1,21 @@
|
||||
From ebd924d773223593142d417c41d4ee6fa16f1805 Mon Sep 17 00:00:00 2001
|
||||
From: OpenWrt community <openwrt-devel@lists.openwrt.org>
|
||||
Date: Wed, 13 Jul 2022 13:45:56 +0200
|
||||
Subject: [PATCH] net/dsa/mv88e6xxx: disable ATU violation
|
||||
|
||||
---
|
||||
drivers/net/dsa/mv88e6xxx/chip.c | 3 +++
|
||||
1 file changed, 3 insertions(+)
|
||||
|
||||
--- a/drivers/net/dsa/mv88e6xxx/chip.c
|
||||
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
|
||||
@@ -3461,6 +3461,9 @@ static int mv88e6xxx_setup_port(struct m
|
||||
else
|
||||
reg = 1 << port;
|
||||
|
||||
+ /* Disable ATU member violation interrupt */
|
||||
+ reg |= MV88E6XXX_PORT_ASSOC_VECTOR_IGNORE_WRONG;
|
||||
+
|
||||
err = mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_ASSOC_VECTOR,
|
||||
reg);
|
||||
if (err)
|
120
target/linux/generic/hack-6.1/720-net-phy-add-aqr-phys.patch
Normal file
120
target/linux/generic/hack-6.1/720-net-phy-add-aqr-phys.patch
Normal file
@ -0,0 +1,120 @@
|
||||
From: Birger Koblitz <git@birger-koblitz.de>
|
||||
Date: Sun, 5 Sep 2021 15:13:10 +0200
|
||||
Subject: [PATCH] kernel: Add AQR113C and AQR813 support
|
||||
|
||||
This hack adds support for the Aquantia 4th generation, 10GBit
|
||||
PHYs AQR113C and AQR813.
|
||||
|
||||
Signed-off-by: Birger Koblitz <git@birger-koblitz.de>
|
||||
|
||||
--- a/drivers/net/phy/aquantia_main.c
|
||||
+++ b/drivers/net/phy/aquantia_main.c
|
||||
@@ -23,6 +23,7 @@
|
||||
#define PHY_ID_AQCS109 0x03a1b5c2
|
||||
#define PHY_ID_AQR405 0x03a1b4b0
|
||||
#define PHY_ID_AQR113C 0x31c31c12
|
||||
+#define PHY_ID_AQR813 0x31c31cb2
|
||||
|
||||
#define MDIO_PHYXS_VEND_IF_STATUS 0xe812
|
||||
#define MDIO_PHYXS_VEND_IF_STATUS_TYPE_MASK GENMASK(7, 3)
|
||||
@@ -415,6 +416,49 @@ static int aqr107_read_rate(struct phy_d
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int aqr113c_read_status(struct phy_device *phydev)
|
||||
+{
|
||||
+ int val, ret;
|
||||
+
|
||||
+ ret = aqr_read_status(phydev);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+
|
||||
+ if (!phydev->link || phydev->autoneg == AUTONEG_DISABLE)
|
||||
+ return 0;
|
||||
+
|
||||
+ // On AQR113C, the speed returned by aqr_read_status is wrong
|
||||
+ aqr107_read_rate(phydev);
|
||||
+
|
||||
+ val = phy_read_mmd(phydev, MDIO_MMD_PHYXS, MDIO_PHYXS_VEND_IF_STATUS);
|
||||
+ if (val < 0)
|
||||
+ return val;
|
||||
+
|
||||
+ switch (FIELD_GET(MDIO_PHYXS_VEND_IF_STATUS_TYPE_MASK, val)) {
|
||||
+ case MDIO_PHYXS_VEND_IF_STATUS_TYPE_KR:
|
||||
+ phydev->interface = PHY_INTERFACE_MODE_10GKR;
|
||||
+ break;
|
||||
+ case MDIO_PHYXS_VEND_IF_STATUS_TYPE_XFI:
|
||||
+ phydev->interface = PHY_INTERFACE_MODE_10GBASER;
|
||||
+ break;
|
||||
+ case MDIO_PHYXS_VEND_IF_STATUS_TYPE_USXGMII:
|
||||
+ phydev->interface = PHY_INTERFACE_MODE_USXGMII;
|
||||
+ break;
|
||||
+ case MDIO_PHYXS_VEND_IF_STATUS_TYPE_SGMII:
|
||||
+ phydev->interface = PHY_INTERFACE_MODE_SGMII;
|
||||
+ break;
|
||||
+ case MDIO_PHYXS_VEND_IF_STATUS_TYPE_OCSGMII:
|
||||
+ phydev->interface = PHY_INTERFACE_MODE_2500BASEX;
|
||||
+ break;
|
||||
+ default:
|
||||
+ phydev->interface = PHY_INTERFACE_MODE_NA;
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ /* Read downshifted rate from vendor register */
|
||||
+ return aqr107_read_rate(phydev);
|
||||
+}
|
||||
+
|
||||
static int aqr107_read_status(struct phy_device *phydev)
|
||||
{
|
||||
int val, ret;
|
||||
@@ -554,7 +598,7 @@ static void aqr107_chip_info(struct phy_
|
||||
build_id = FIELD_GET(VEND1_GLOBAL_RSVD_STAT1_FW_BUILD_ID, val);
|
||||
prov_id = FIELD_GET(VEND1_GLOBAL_RSVD_STAT1_PROV_ID, val);
|
||||
|
||||
- phydev_dbg(phydev, "FW %u.%u, Build %u, Provisioning %u\n",
|
||||
+ phydev_info(phydev, "FW %u.%u, Build %u, Provisioning %u\n",
|
||||
fw_major, fw_minor, build_id, prov_id);
|
||||
}
|
||||
|
||||
@@ -809,7 +853,7 @@ static struct phy_driver aqr_driver[] =
|
||||
.config_aneg = aqr_config_aneg,
|
||||
.config_intr = aqr_config_intr,
|
||||
.handle_interrupt = aqr_handle_interrupt,
|
||||
- .read_status = aqr107_read_status,
|
||||
+ .read_status = aqr113c_read_status,
|
||||
.get_tunable = aqr107_get_tunable,
|
||||
.set_tunable = aqr107_set_tunable,
|
||||
.suspend = aqr107_suspend,
|
||||
@@ -819,6 +863,24 @@ static struct phy_driver aqr_driver[] =
|
||||
.get_stats = aqr107_get_stats,
|
||||
.link_change_notify = aqr107_link_change_notify,
|
||||
},
|
||||
+{
|
||||
+ PHY_ID_MATCH_MODEL(PHY_ID_AQR813),
|
||||
+ .name = "Aquantia AQR813",
|
||||
+ .probe = aqr107_probe,
|
||||
+ .config_init = aqr107_config_init,
|
||||
+ .config_aneg = aqr_config_aneg,
|
||||
+ .config_intr = aqr_config_intr,
|
||||
+ .handle_interrupt = aqr_handle_interrupt,
|
||||
+ .read_status = aqr113c_read_status,
|
||||
+ .get_tunable = aqr107_get_tunable,
|
||||
+ .set_tunable = aqr107_set_tunable,
|
||||
+ .suspend = aqr107_suspend,
|
||||
+ .resume = aqr107_resume,
|
||||
+ .get_sset_count = aqr107_get_sset_count,
|
||||
+ .get_strings = aqr107_get_strings,
|
||||
+ .get_stats = aqr107_get_stats,
|
||||
+ .link_change_notify = aqr107_link_change_notify,
|
||||
+},
|
||||
};
|
||||
|
||||
module_phy_driver(aqr_driver);
|
||||
@@ -832,6 +894,7 @@ static struct mdio_device_id __maybe_unu
|
||||
{ PHY_ID_MATCH_MODEL(PHY_ID_AQCS109) },
|
||||
{ PHY_ID_MATCH_MODEL(PHY_ID_AQR405) },
|
||||
{ PHY_ID_MATCH_MODEL(PHY_ID_AQR113C) },
|
||||
+ { PHY_ID_MATCH_MODEL(PHY_ID_AQR813) },
|
||||
{ }
|
||||
};
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user