kernel: add linux 6.0 stable release support

This commit is contained in:
DHDAXCW 2022-10-23 15:53:08 +00:00
parent 851aace99b
commit 94dd0508ef
253 changed files with 48501 additions and 34430 deletions

2
include/kernel-6.0 Normal file
View File

@ -0,0 +1,2 @@
LINUX_VERSION-6.0 = .3
LINUX_KERNEL_HASH-6.0.3 = b0d522241805794d8af3a67d331ba063a16496c6fb6d365d48f7ed78ee1c3dcf

View File

@ -109,9 +109,9 @@ define KernelPackage/fs-cifs
+kmod-crypto-ccm \
+kmod-crypto-ecb \
+kmod-crypto-des \
+(LINUX_5_15):kmod-asn1-decoder \
+(LINUX_5_15):kmod-oid-registry \
+(LINUX_5_15):kmod-dnsresolver
+(LINUX_5_15||LINUX_6_0):kmod-asn1-decoder \
+(LINUX_5_15||LINUX_6_0):kmod-oid-registry \
+(LINUX_5_15||LINUX_6_0):kmod-dnsresolver
endef
define KernelPackage/fs-cifs/description
@ -530,7 +530,7 @@ $(eval $(call KernelPackage,fs-ntfs))
define KernelPackage/fs-ntfs3
SUBMENU:=$(FS_MENU)
TITLE:=NTFS3 Read-Write file system support
DEPENDS:=@(LINUX_5_4||LINUX_5_15) +kmod-nls-base
DEPENDS:=@(LINUX_5_4||LINUX_5_15||LINUX_6_0) +kmod-nls-base
KCONFIG:= \
CONFIG_NTFS3_FS \
CONFIG_NTFS3_64BIT_CLUSTER=y \

View File

@ -142,7 +142,7 @@ $(eval $(call KernelPackage,mii))
define KernelPackage/mdio-devres
SUBMENU:=$(NETWORK_DEVICES_MENU)
TITLE:=Supports MDIO device registration
DEPENDS:=@(LINUX_5_10||LINUX_5_15) +kmod-libphy PACKAGE_kmod-of-mdio:kmod-of-mdio
DEPENDS:=@(LINUX_5_10||LINUX_5_15||LINUX_6_0) +kmod-libphy PACKAGE_kmod-of-mdio:kmod-of-mdio
KCONFIG:=CONFIG_MDIO_DEVRES
HIDDEN:=1
FILES:=$(LINUX_DIR)/drivers/net/phy/mdio_devres.ko
@ -563,7 +563,7 @@ $(eval $(call KernelPackage,8139cp))
define KernelPackage/r8169
SUBMENU:=$(NETWORK_DEVICES_MENU)
TITLE:=RealTek RTL-8169 PCI Gigabit Ethernet Adapter kernel support
DEPENDS:=@PCI_SUPPORT +kmod-mii +r8169-firmware +kmod-phy-realtek +(LINUX_5_10||LINUX_5_15):kmod-mdio-devres
DEPENDS:=@PCI_SUPPORT +kmod-mii +r8169-firmware +kmod-phy-realtek +(LINUX_5_10||LINUX_5_15||LINUX_6_0):kmod-mdio-devres
KCONFIG:= \
CONFIG_R8169 \
CONFIG_R8169_NAPI=y \
@ -689,7 +689,7 @@ $(eval $(call KernelPackage,igbvf))
define KernelPackage/ixgbe
SUBMENU:=$(NETWORK_DEVICES_MENU)
TITLE:=Intel(R) 82598/82599 PCI-Express 10 Gigabit Ethernet support
DEPENDS:=@PCI_SUPPORT +kmod-mdio +kmod-ptp +kmod-hwmon-core +kmod-libphy +(LINUX_5_10||LINUX_5_15):kmod-mdio-devres
DEPENDS:=@PCI_SUPPORT +kmod-mdio +kmod-ptp +kmod-hwmon-core +kmod-libphy +(LINUX_5_10||LINUX_5_15||LINUX_6_0):kmod-mdio-devres
KCONFIG:=CONFIG_IXGBE \
CONFIG_IXGBE_VXLAN=n \
CONFIG_IXGBE_HWMON=y \

View File

@ -1151,7 +1151,7 @@ $(eval $(call KernelPackage,keys-trusted))
define KernelPackage/tpm
SUBMENU:=$(OTHER_MENU)
TITLE:=TPM Hardware Support
DEPENDS:= +kmod-random-core +(LINUX_5_15):kmod-asn1-decoder \
DEPENDS:= +kmod-random-core +(LINUX_5_15||LINUX_6_0):kmod-asn1-decoder \
+(LINUX_5_15):kmod-asn1-encoder +(LINUX_5_15):kmod-oid-registry
KCONFIG:= CONFIG_TCG_TPM
FILES:= $(LINUX_DIR)/drivers/char/tpm/tpm.ko
@ -1298,7 +1298,7 @@ $(eval $(call KernelPackage,qcom-qmi-helpers))
define KernelPackage/mhi
SUBMENU:=$(OTHER_MENU)
TITLE:=Modem Host Interface (MHI) bus
DEPENDS:=@LINUX_5_15
DEPENDS:=@(LINUX_5_15||LINUX_6_0)
KCONFIG:=CONFIG_MHI_BUS \
CONFIG_MHI_BUS_DEBUG=y \
CONFIG_MHI_BUS_PCI_GENERIC=n \

View File

@ -1138,7 +1138,7 @@ $(eval $(call KernelPackage,usb-net-aqc111))
define KernelPackage/usb-net-asix
TITLE:=Kernel module for USB-to-Ethernet Asix convertors
DEPENDS:=+kmod-libphy +LINUX_5_15:kmod-mdio-devres
DEPENDS:=+kmod-libphy +(LINUX_5_15||LINUX_6_0):kmod-mdio-devres
KCONFIG:=CONFIG_USB_NET_AX8817X
FILES:= \
$(LINUX_DIR)/drivers/$(USBNET_DIR)/asix.ko \

View File

@ -244,7 +244,7 @@ define KernelPackage/drm
TITLE:=Direct Rendering Manager (DRM) support
HIDDEN:=1
DEPENDS:=+kmod-dma-buf +kmod-i2c-core +kmod-i2c-algo-bit +PACKAGE_kmod-backlight:kmod-backlight \
+(LINUX_5_15):kmod-fb
+(LINUX_5_15||LINUX_6_0):kmod-fb
KCONFIG:= \
CONFIG_DRM \
CONFIG_DRM_PANEL_ORIENTATION_QUIRKS=y \
@ -263,6 +263,17 @@ endef
$(eval $(call KernelPackage,drm))
define KernelPackage/drm-buddy
SUBMENU:=$(VIDEO_MENU)
TITLE:=A page based buddy allocator
DEPENDS:=@TARGET_x86 @DISPLAY_SUPPORT +kmod-drm @(LINUX_5_19||LINUX_6_0)
KCONFIG:=CONFIG_DRM_BUDDY
FILES:= $(LINUX_DIR)/drivers/gpu/drm/drm_buddy.ko
AUTOLOAD:=$(call AutoProbe,drm_buddy)
endef
$(eval $(call KernelPackage,drm-buddy))
define KernelPackage/drm-ttm
SUBMENU:=$(VIDEO_MENU)
TITLE:=GPU memory management subsystem
@ -297,11 +308,27 @@ endef
$(eval $(call KernelPackage,drm-kms-helper))
define KernelPackage/drm-display-helper
SUBMENU:=$(VIDEO_MENU)
TITLE:=DRM helpers for display adapters drivers
DEPENDS:=@DISPLAY_SUPPORT +kmod-drm +TARGET_x86:kmod-drm-buddy @(LINUX_6_0)
KCONFIG:=CONFIG_DRM_DISPLAY_HELPER
FILES:=$(LINUX_DIR)/drivers/gpu/drm/display/drm_display_helper.ko
AUTOLOAD:=$(call AutoProbe,drm_display_helper)
endef
define KernelPackage/drm-display-helper/description
DRM helpers for display adapters drivers.
endef
$(eval $(call KernelPackage,drm-display-helper))
define KernelPackage/drm-amdgpu
SUBMENU:=$(VIDEO_MENU)
TITLE:=AMDGPU DRM support
DEPENDS:=@TARGET_x86 @DISPLAY_SUPPORT +kmod-backlight +kmod-drm-ttm \
+kmod-drm-kms-helper +kmod-i2c-algo-bit +amdgpu-firmware
+kmod-drm-kms-helper +kmod-i2c-algo-bit +amdgpu-firmware \
+(LINUX_6_0):kmod-drm-display-helper
KCONFIG:=CONFIG_DRM_AMDGPU \
CONFIG_DRM_AMDGPU_SI=y \
CONFIG_DRM_AMDGPU_CIK=y \
@ -1075,7 +1102,8 @@ $(eval $(call KernelPackage,video-gspca-konica))
define KernelPackage/drm-i915
SUBMENU:=$(VIDEO_MENU)
TITLE:=Intel GPU drm support
DEPENDS:=@TARGET_x86 +kmod-drm-ttm +kmod-drm-kms-helper +i915-firmware
DEPENDS:=@TARGET_x86 +kmod-drm-ttm +kmod-drm-kms-helper +i915-firmware \
+(LINUX_6_0):kmod-drm-display-helper
KCONFIG:= \
CONFIG_INTEL_GTT \
CONFIG_DRM_I915 \

View File

@ -7,8 +7,13 @@ include $(TOPDIR)/rules.mk
include $(INCLUDE_DIR)/kernel.mk
PKG_NAME:=r8168
PKG_VERSION:=8.050.02
PKG_RELEASE:=1
PKG_VERSION:=8.050.03
PKG_RELEASE:=$(AUTORELEASE)
PKG_SOURCE_PROTO:=git
PKG_SOURCE_URL:=https://github.com/BROBIRD/openwrt-r8168.git
PKG_SOURCE_VERSION:=ddfaceacd1b7ed2857fb995642a8ffb1fc37e989
PKG_MIRROR_HASH:=5428f60dc33e9503c6cfdf690c00077149dce24cbb0591129d905b9f1aad9202
PKG_BUILD_DIR:=$(KERNEL_BUILD_DIR)/$(PKG_NAME)-$(PKG_VERSION)
@ -18,8 +23,7 @@ define KernelPackage/r8168
TITLE:=Driver for Realtek r8168 chipsets
SUBMENU:=Network Devices
VERSION:=$(LINUX_VERSION)+$(PKG_VERSION)-$(BOARD)-$(PKG_RELEASE)
DEPENDS:=@PCI_SUPPORT
FILES:= $(PKG_BUILD_DIR)/r8168.ko
FILES:= $(PKG_BUILD_DIR)/src/r8168.ko
AUTOLOAD:=$(call AutoProbe,r8168)
CONFLICTS:=kmod-r8169
endef
@ -28,20 +32,8 @@ define Package/r8168/description
This package contains a driver for Realtek r8168 chipsets.
endef
R8168_MAKEOPTS= -C $(PKG_BUILD_DIR) \
PATH="$(TARGET_PATH)" \
ARCH="$(LINUX_KARCH)" \
CROSS_COMPILE="$(TARGET_CROSS)" \
TARGET="$(HAL_TARGET)" \
TOOLPREFIX="$(KERNEL_CROSS)" \
TOOLPATH="$(KERNEL_CROSS)" \
KERNELPATH="$(LINUX_DIR)" \
KERNELDIR="$(LINUX_DIR)" \
LDOPTS=" " \
DOMULTI=1
define Build/Compile
$(MAKE) $(R8168_MAKEOPTS) modules
+$(KERNEL_MAKE) M=$(PKG_BUILD_DIR)/src modules
endef
$(eval $(call KernelPackage,r8168))

View File

@ -1,5 +1,5 @@
--- a/r8168_n.c
+++ b/r8168_n.c
--- a/src/r8168_n.c
+++ b/src/r8168_n.c
@@ -47,6 +47,7 @@
#include <linux/etherdevice.h>
#include <linux/delay.h>
@ -8,7 +8,7 @@
#include <linux/if_vlan.h>
#include <linux/crc32.h>
#include <linux/interrupt.h>
@@ -24607,6 +24608,22 @@ rtl8168_set_bios_setting(struct net_device *dev)
@@ -24643,6 +24644,22 @@ rtl8168_set_bios_setting(struct net_devi
}
}
@ -31,7 +31,7 @@
static void
rtl8168_init_software_variable(struct net_device *dev)
{
@@ -25169,6 +25186,8 @@ rtl8168_init_software_variable(struct net_device *dev)
@@ -25206,6 +25223,8 @@ rtl8168_init_software_variable(struct ne
tp->NotWrMcuPatchCode = TRUE;
}

View File

@ -0,0 +1,47 @@
--- a/src/r8168_n.c
+++ b/src/r8168_n.c
@@ -3715,7 +3715,11 @@
txd->opts2 = 0;
while (1) {
memset(tmpAddr, pattern++, len - 14);
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5,18,0)
pci_dma_sync_single_for_device(tp->pci_dev,
+#else
+ dma_sync_single_for_device(tp_to_dev(tp),
+#endif
le64_to_cpu(mapping),
len, DMA_TO_DEVICE);
txd->opts1 = cpu_to_le32(DescOwn | FirstFrag | LastFrag | len);
@@ -3743,7 +3747,11 @@
if (rx_len == len) {
dma_sync_single_for_cpu(tp_to_dev(tp), le64_to_cpu(rxd->addr), tp->rx_buf_sz, DMA_FROM_DEVICE);
i = memcmp(skb->data, rx_skb->data, rx_len);
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5,18,0)
pci_dma_sync_single_for_device(tp->pci_dev, le64_to_cpu(rxd->addr), tp->rx_buf_sz, DMA_FROM_DEVICE);
+#else
+ dma_sync_single_for_device(tp_to_dev(tp), le64_to_cpu(rxd->addr), tp->rx_buf_sz, DMA_FROM_DEVICE);
+#endif
if (i == 0) {
// dev_printk(KERN_INFO, tp_to_dev(tp), "loopback test finished\n",rx_len,len);
break;
@@ -26464,11 +26472,20 @@
if ((sizeof(dma_addr_t) > 4) &&
use_dac &&
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5,18,0)
!pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) &&
!pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) {
+#else
+ !dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)) &&
+ !dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64))) {
+#endif
dev->features |= NETIF_F_HIGHDMA;
} else {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5,18,0)
rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+#else
+ rc = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
+#endif
if (rc < 0) {
#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,0)
if (netif_msg_probe(tp))

View File

@ -0,0 +1,18 @@
--- a/src/r8168_n.c
+++ b/src/r8168_n.c
@@ -116,6 +116,15 @@
#define FIRMWARE_8168FP_3 "rtl_nic/rtl8168fp-3.fw"
#define FIRMWARE_8168FP_4 "rtl_nic/rtl8168fp-4.fw"
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 19, 0)
+static inline void netif_set_gso_max_size(struct net_device *dev,
+ unsigned int size)
+{
+ /* dev->gso_max_size is read locklessly from sk_setup_caps() */
+ WRITE_ONCE(dev->gso_max_size, size);
+}
+#endif
+
/* Maximum number of multicast addresses to filter (vs. Rx-all-multicast).
The RTL chips use a 64 element hash table based on the Ethernet CRC. */
static const int multicast_filter_limit = 32;

View File

@ -1,175 +0,0 @@
# SPDX-License-Identifier: GPL-2.0-only
################################################################################
#
# r8168 is the Linux device driver released for Realtek Gigabit Ethernet
# controllers with PCI-Express interface.
#
# Copyright(c) 2022 Realtek Semiconductor Corp. All rights reserved.
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, see <http://www.gnu.org/licenses/>.
#
# Author:
# Realtek NIC software team <nicfae@realtek.com>
# No. 2, Innovation Road II, Hsinchu Science Park, Hsinchu 300, Taiwan
#
################################################################################
################################################################################
# This product is covered by one or more of the following patents:
# US6,570,884, US6,115,776, and US6,327,625.
################################################################################
CONFIG_SOC_LAN = n
ENABLE_FIBER_SUPPORT = n
ENABLE_REALWOW_SUPPORT = n
ENABLE_DASH_SUPPORT = n
ENABLE_DASH_PRINTER_SUPPORT = n
CONFIG_DOWN_SPEED_100 = n
CONFIG_ASPM = y
ENABLE_S5WOL = y
ENABLE_S5_KEEP_CURR_MAC = n
ENABLE_EEE = y
ENABLE_S0_MAGIC_PACKET = n
CONFIG_DYNAMIC_ASPM = y
ENABLE_USE_FIRMWARE_FILE = n
CONFIG_CTAP_SHORT_OFF = n
ifneq ($(KERNELRELEASE),)
obj-m := r8168.o
r8168-objs := r8168_n.o r8168_asf.o rtl_eeprom.o rtltool.o
ifeq ($(CONFIG_SOC_LAN), y)
EXTRA_CFLAGS += -DCONFIG_SOC_LAN
endif
ifeq ($(ENABLE_FIBER_SUPPORT), y)
r8168-objs += r8168_fiber.o
EXTRA_CFLAGS += -DENABLE_FIBER_SUPPORT
endif
ifeq ($(ENABLE_REALWOW_SUPPORT), y)
r8168-objs += r8168_realwow.o
EXTRA_CFLAGS += -DENABLE_REALWOW_SUPPORT
endif
ifeq ($(ENABLE_DASH_SUPPORT), y)
r8168-objs += r8168_dash.o
EXTRA_CFLAGS += -DENABLE_DASH_SUPPORT
endif
ifeq ($(ENABLE_DASH_PRINTER_SUPPORT), y)
r8168-objs += r8168_dash.o
EXTRA_CFLAGS += -DENABLE_DASH_SUPPORT -DENABLE_DASH_PRINTER_SUPPORT
endif
EXTRA_CFLAGS += -DCONFIG_R8168_NAPI
EXTRA_CFLAGS += -DCONFIG_R8168_VLAN
ifeq ($(CONFIG_DOWN_SPEED_100), y)
EXTRA_CFLAGS += -DCONFIG_DOWN_SPEED_100
endif
ifeq ($(CONFIG_ASPM), y)
EXTRA_CFLAGS += -DCONFIG_ASPM
endif
ifeq ($(ENABLE_S5WOL), y)
EXTRA_CFLAGS += -DENABLE_S5WOL
endif
ifeq ($(ENABLE_S5_KEEP_CURR_MAC), y)
EXTRA_CFLAGS += -DENABLE_S5_KEEP_CURR_MAC
endif
ifeq ($(ENABLE_EEE), y)
EXTRA_CFLAGS += -DENABLE_EEE
endif
ifeq ($(ENABLE_S0_MAGIC_PACKET), y)
EXTRA_CFLAGS += -DENABLE_S0_MAGIC_PACKET
endif
ifeq ($(CONFIG_DYNAMIC_ASPM), y)
EXTRA_CFLAGS += -DCONFIG_DYNAMIC_ASPM
endif
ifeq ($(ENABLE_USE_FIRMWARE_FILE), y)
r8168-objs += r8168_firmware.o
EXTRA_CFLAGS += -DENABLE_USE_FIRMWARE_FILE
endif
ifeq ($(CONFIG_CTAP_SHORT_OFF), y)
EXTRA_CFLAGS += -DCONFIG_CTAP_SHORT_OFF
endif
else
BASEDIR := /lib/modules/$(shell uname -r)
KERNELDIR ?= $(BASEDIR)/build
PWD :=$(shell pwd)
DRIVERDIR := $(shell find $(BASEDIR)/kernel/drivers/net/ethernet -name realtek -type d)
ifeq ($(DRIVERDIR),)
DRIVERDIR := $(shell find $(BASEDIR)/kernel/drivers/net -name realtek -type d)
endif
ifeq ($(DRIVERDIR),)
DRIVERDIR := $(BASEDIR)/kernel/drivers/net
endif
RTKDIR := $(subst $(BASEDIR)/,,$(DRIVERDIR))
KERNEL_GCC_VERSION := $(shell cat /proc/version | sed -n 's/.*gcc version \([[:digit:]]\.[[:digit:]]\.[[:digit:]]\).*/\1/p')
CCVERSION = $(shell $(CC) -dumpversion)
KVER = $(shell uname -r)
KMAJ = $(shell echo $(KVER) | \
sed -e 's/^\([0-9][0-9]*\)\.[0-9][0-9]*\.[0-9][0-9]*.*/\1/')
KMIN = $(shell echo $(KVER) | \
sed -e 's/^[0-9][0-9]*\.\([0-9][0-9]*\)\.[0-9][0-9]*.*/\1/')
KREV = $(shell echo $(KVER) | \
sed -e 's/^[0-9][0-9]*\.[0-9][0-9]*\.\([0-9][0-9]*\).*/\1/')
kver_ge = $(shell \
echo test | awk '{if($(KMAJ) < $(1)) {print 0} else { \
if($(KMAJ) > $(1)) {print 1} else { \
if($(KMIN) < $(2)) {print 0} else { \
if($(KMIN) > $(2)) {print 1} else { \
if($(KREV) < $(3)) {print 0} else { print 1 } \
}}}}}' \
)
.PHONY: all
all: print_vars clean modules install
print_vars:
@echo
@echo "CC: " $(CC)
@echo "CCVERSION: " $(CCVERSION)
@echo "KERNEL_GCC_VERSION: " $(KERNEL_GCC_VERSION)
@echo "KVER: " $(KVER)
@echo "KMAJ: " $(KMAJ)
@echo "KMIN: " $(KMIN)
@echo "KREV: " $(KREV)
@echo "BASEDIR: " $(BASEDIR)
@echo "DRIVERDIR: " $(DRIVERDIR)
@echo "PWD: " $(PWD)
@echo "RTKDIR: " $(RTKDIR)
@echo
.PHONY:modules
modules:
#ifeq ($(call kver_ge,5,0,0),1)
$(MAKE) -C $(KERNELDIR) M=$(PWD) modules
#else
# $(MAKE) -C $(KERNELDIR) SUBDIRS=$(PWD) modules
#endif
.PHONY:clean
clean:
#ifeq ($(call kver_ge,5,0,0),1)
$(MAKE) -C $(KERNELDIR) M=$(PWD) clean
#else
# $(MAKE) -C $(KERNELDIR) SUBDIRS=$(PWD) clean
#endif
.PHONY:install
install:
#ifeq ($(call kver_ge,5,0,0),1)
$(MAKE) -C $(KERNELDIR) M=$(PWD) INSTALL_MOD_DIR=$(RTKDIR) modules_install
#else
# $(MAKE) -C $(KERNELDIR) SUBDIRS=$(PWD) INSTALL_MOD_DIR=$(RTKDIR) modules_install
#endif
endif

View File

@ -1,75 +0,0 @@
# SPDX-License-Identifier: GPL-2.0-only
################################################################################
#
# r8168 is the Linux device driver released for Realtek Gigabit Ethernet
# controllers with PCI-Express interface.
#
# Copyright(c) 2022 Realtek Semiconductor Corp. All rights reserved.
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, see <http://www.gnu.org/licenses/>.
#
# Author:
# Realtek NIC software team <nicfae@realtek.com>
# No. 2, Innovation Road II, Hsinchu Science Park, Hsinchu 300, Taiwan
#
################################################################################
################################################################################
# This product is covered by one or more of the following patents:
# US6,570,884, US6,115,776, and US6,327,625.
################################################################################
CC := gcc
LD := ld
ARCH := $(shell uname -m | sed 's/i.86/i386/')
KSRC := /lib/modules/$(shell uname -r)/build
CONFIG_FILE := $(KSRC)/include/linux/autoconf.h
KMISC := /lib/modules/$(shell uname -r)/kernel/drivers/net/
ifeq ($(ARCH),x86_64)
MODCFLAGS += -mcmodel=kernel -mno-red-zone
endif
#standard flags for module builds
MODCFLAGS += -DLINUX -D__KERNEL__ -DMODULE -O2 -pipe -Wall
MODCFLAGS += -I$(KSRC)/include -I.
MODCFLAGS += -DMODVERSIONS -DEXPORT_SYMTAB -include $(KSRC)/include/linux/modversions.h
SOURCE := r8168_n.c r8168_asf.c rtl_eeprom.c rtltool.c
OBJS := $(SOURCE:.c=.o)
SMP := $(shell $(CC) $(MODCFLAGS) -E -dM $(CONFIG_FILE) | \
grep CONFIG_SMP | awk '{print $$3}')
ifneq ($(SMP),1)
SMP := 0
endif
ifeq ($(SMP),1)
MODCFLAGS += -D__SMP__
endif
modules: $(OBJS)
$(LD) -r $^ -o r8168.o
strip --strip-debug r8168.o
%.o: %.c
$(CC) $(MODCFLAGS) -c $< -o $@
clean:
rm *.o -f
install:
install -m 744 -c r8168.o $(KMISC)

File diff suppressed because it is too large Load Diff

View File

@ -1,422 +0,0 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
################################################################################
#
# r8168 is the Linux device driver released for Realtek Gigabit Ethernet
# controllers with PCI-Express interface.
#
# Copyright(c) 2022 Realtek Semiconductor Corp. All rights reserved.
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, see <http://www.gnu.org/licenses/>.
#
# Author:
# Realtek NIC software team <nicfae@realtek.com>
# No. 2, Innovation Road II, Hsinchu Science Park, Hsinchu 300, Taiwan
#
################################################################################
*/
/************************************************************************************
* This product is covered by one or more of the following patents:
* US6,570,884, US6,115,776, and US6,327,625.
***********************************************************************************/
#include <linux/module.h>
#include <linux/version.h>
#include <linux/pci.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/delay.h>
#include <linux/ethtool.h>
#include <linux/mii.h>
#include <linux/if_vlan.h>
#include <linux/crc32.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/init.h>
#include <linux/rtnetlink.h>
#include <asm/uaccess.h>
#include "r8168.h"
#include "r8168_asf.h"
#include "rtl_eeprom.h"
int rtl8168_asf_ioctl(struct net_device *dev,
struct ifreq *ifr)
{
struct rtl8168_private *tp = netdev_priv(dev);
void *user_data = ifr->ifr_data;
struct asf_ioctl_struct asf_usrdata;
unsigned long flags;
if (tp->mcfg != CFG_METHOD_7 && tp->mcfg != CFG_METHOD_8)
return -EOPNOTSUPP;
if (copy_from_user(&asf_usrdata, user_data, sizeof(struct asf_ioctl_struct)))
return -EFAULT;
spin_lock_irqsave(&tp->lock, flags);
switch (asf_usrdata.offset) {
case HBPeriod:
rtl8168_asf_hbperiod(tp, asf_usrdata.arg, asf_usrdata.u.data);
break;
case WD8Timer:
break;
case WD16Rst:
rtl8168_asf_wd16rst(tp, asf_usrdata.arg, asf_usrdata.u.data);
break;
case WD8Rst:
rtl8168_asf_time_period(tp, asf_usrdata.arg, WD8Rst, asf_usrdata.u.data);
break;
case LSnsrPollCycle:
rtl8168_asf_time_period(tp, asf_usrdata.arg, LSnsrPollCycle, asf_usrdata.u.data);
break;
case ASFSnsrPollPrd:
rtl8168_asf_time_period(tp, asf_usrdata.arg, ASFSnsrPollPrd, asf_usrdata.u.data);
break;
case AlertReSendItvl:
rtl8168_asf_time_period(tp, asf_usrdata.arg, AlertReSendItvl, asf_usrdata.u.data);
break;
case SMBAddr:
rtl8168_asf_rw_hexadecimal(tp, asf_usrdata.arg, SMBAddr, RW_ONE_BYTE, asf_usrdata.u.data);
break;
case ASFConfigR0:
rtl8168_asf_config_regs(tp, asf_usrdata.arg, ASFConfigR0, asf_usrdata.u.data);
break;
case ASFConfigR1:
rtl8168_asf_config_regs(tp, asf_usrdata.arg, ASFConfigR1, asf_usrdata.u.data);
break;
case ConsoleMA:
rtl8168_asf_console_mac(tp, asf_usrdata.arg, asf_usrdata.u.data);
break;
case ConsoleIP:
rtl8168_asf_ip_address(tp, asf_usrdata.arg, ConsoleIP, asf_usrdata.u.data);
break;
case IPAddr:
rtl8168_asf_ip_address(tp, asf_usrdata.arg, IPAddr, asf_usrdata.u.data);
break;
case UUID:
rtl8168_asf_rw_uuid(tp, asf_usrdata.arg, asf_usrdata.u.data);
break;
case IANA:
rtl8168_asf_rw_iana(tp, asf_usrdata.arg, asf_usrdata.u.data);
break;
case SysID:
rtl8168_asf_rw_systemid(tp, asf_usrdata.arg, asf_usrdata.u.data);
break;
case Community:
rtl8168_asf_community_string(tp, asf_usrdata.arg, asf_usrdata.u.string);
break;
case StringLength:
rtl8168_asf_community_string_len(tp, asf_usrdata.arg, asf_usrdata.u.data);
break;
case FmCapMsk:
rtl8168_asf_capability_masks(tp, asf_usrdata.arg, FmCapMsk, asf_usrdata.u.data);
break;
case SpCMDMsk:
rtl8168_asf_capability_masks(tp, asf_usrdata.arg, SpCMDMsk, asf_usrdata.u.data);
break;
case SysCapMsk:
rtl8168_asf_capability_masks(tp, asf_usrdata.arg, SysCapMsk, asf_usrdata.u.data);
break;
case RmtRstAddr:
rtl8168_asf_rw_hexadecimal(tp, asf_usrdata.arg, RmtRstAddr, RW_ONE_BYTE, asf_usrdata.u.data);
break;
case RmtRstCmd:
rtl8168_asf_rw_hexadecimal(tp, asf_usrdata.arg, RmtRstCmd, RW_ONE_BYTE, asf_usrdata.u.data);
break;
case RmtRstData:
rtl8168_asf_rw_hexadecimal(tp, asf_usrdata.arg, RmtRstData, RW_ONE_BYTE, asf_usrdata.u.data);
break;
case RmtPwrOffAddr:
rtl8168_asf_rw_hexadecimal(tp, asf_usrdata.arg, RmtPwrOffAddr, RW_ONE_BYTE, asf_usrdata.u.data);
break;
case RmtPwrOffCmd:
rtl8168_asf_rw_hexadecimal(tp, asf_usrdata.arg, RmtPwrOffCmd, RW_ONE_BYTE, asf_usrdata.u.data);
break;
case RmtPwrOffData:
rtl8168_asf_rw_hexadecimal(tp, asf_usrdata.arg, RmtPwrOffData, RW_ONE_BYTE, asf_usrdata.u.data);
break;
case RmtPwrOnAddr:
rtl8168_asf_rw_hexadecimal(tp, asf_usrdata.arg, RmtPwrOnAddr, RW_ONE_BYTE, asf_usrdata.u.data);
break;
case RmtPwrOnCmd:
rtl8168_asf_rw_hexadecimal(tp, asf_usrdata.arg, RmtPwrOnCmd, RW_ONE_BYTE, asf_usrdata.u.data);
break;
case RmtPwrOnData:
rtl8168_asf_rw_hexadecimal(tp, asf_usrdata.arg, RmtPwrOnData, RW_ONE_BYTE, asf_usrdata.u.data);
break;
case RmtPCRAddr:
rtl8168_asf_rw_hexadecimal(tp, asf_usrdata.arg, RmtPCRAddr, RW_ONE_BYTE, asf_usrdata.u.data);
break;
case RmtPCRCmd:
rtl8168_asf_rw_hexadecimal(tp, asf_usrdata.arg, RmtPCRCmd, RW_ONE_BYTE, asf_usrdata.u.data);
break;
case RmtPCRData:
rtl8168_asf_rw_hexadecimal(tp, asf_usrdata.arg, RmtPCRData, RW_ONE_BYTE, asf_usrdata.u.data);
break;
case ASFSnsr0Addr:
rtl8168_asf_rw_hexadecimal(tp, asf_usrdata.arg, ASFSnsr0Addr, RW_ONE_BYTE, asf_usrdata.u.data);
break;
case LSnsrAddr0:
rtl8168_asf_rw_hexadecimal(tp, asf_usrdata.arg, LSnsrAddr0, RW_ONE_BYTE, asf_usrdata.u.data);
break;
case KO:
/* Get/Set Key Operation */
rtl8168_asf_key_access(tp, asf_usrdata.arg, KO, asf_usrdata.u.data);
break;
case KA:
/* Get/Set Key Administrator */
rtl8168_asf_key_access(tp, asf_usrdata.arg, KA, asf_usrdata.u.data);
break;
case KG:
/* Get/Set Key Generation */
rtl8168_asf_key_access(tp, asf_usrdata.arg, KG, asf_usrdata.u.data);
break;
case KR:
/* Get/Set Key Random */
rtl8168_asf_key_access(tp, asf_usrdata.arg, KR, asf_usrdata.u.data);
break;
default:
spin_unlock_irqrestore(&tp->lock, flags);
return -EOPNOTSUPP;
}
spin_unlock_irqrestore(&tp->lock, flags);
if (copy_to_user(user_data, &asf_usrdata, sizeof(struct asf_ioctl_struct)))
return -EFAULT;
return 0;
}
void rtl8168_asf_hbperiod(struct rtl8168_private *tp, int arg, unsigned int *data)
{
if (arg == ASF_GET)
data[ASFHBPERIOD] = rtl8168_eri_read(tp, HBPeriod, RW_TWO_BYTES, ERIAR_ASF);
else if (arg == ASF_SET) {
rtl8168_eri_write(tp, HBPeriod, RW_TWO_BYTES, data[ASFHBPERIOD], ERIAR_ASF);
rtl8168_eri_write(tp, 0x1EC, RW_ONE_BYTE, 0x07, ERIAR_ASF);
}
}
void rtl8168_asf_wd16rst(struct rtl8168_private *tp, int arg, unsigned int *data)
{
data[ASFWD16RST] = rtl8168_eri_read(tp, WD16Rst, RW_TWO_BYTES, ERIAR_ASF);
}
void rtl8168_asf_console_mac(struct rtl8168_private *tp, int arg, unsigned int *data)
{
int i;
if (arg == ASF_GET) {
for (i = 0; i < 6; i++)
data[i] = rtl8168_eri_read(tp, ConsoleMA + i, RW_ONE_BYTE, ERIAR_ASF);
} else if (arg == ASF_SET) {
for (i = 0; i < 6; i++)
rtl8168_eri_write(tp, ConsoleMA + i, RW_ONE_BYTE, data[i], ERIAR_ASF);
/* write the new console MAC address to EEPROM */
rtl8168_eeprom_write_sc(tp, 70, (data[1] << 8) | data[0]);
rtl8168_eeprom_write_sc(tp, 71, (data[3] << 8) | data[2]);
rtl8168_eeprom_write_sc(tp, 72, (data[5] << 8) | data[4]);
}
}
void rtl8168_asf_ip_address(struct rtl8168_private *tp, int arg, int offset, unsigned int *data)
{
int i;
int eeprom_off = 0;
if (arg == ASF_GET) {
for (i = 0; i < 4; i++)
data[i] = rtl8168_eri_read(tp, offset + i, RW_ONE_BYTE, ERIAR_ASF);
} else if (arg == ASF_SET) {
for (i = 0; i < 4; i++)
rtl8168_eri_write(tp, offset + i, RW_ONE_BYTE, data[i], ERIAR_ASF);
if (offset == ConsoleIP)
eeprom_off = 73;
else if (offset == IPAddr)
eeprom_off = 75;
/* write the new IP address to EEPROM */
rtl8168_eeprom_write_sc(tp, eeprom_off, (data[1] << 8) | data[0]);
rtl8168_eeprom_write_sc(tp, eeprom_off + 1, (data[3] << 8) | data[2]);
}
}
void rtl8168_asf_config_regs(struct rtl8168_private *tp, int arg, int offset, unsigned int *data)
{
unsigned int value;
if (arg == ASF_GET) {
data[ASFCAPABILITY] = (rtl8168_eri_read(tp, offset, RW_ONE_BYTE, ERIAR_ASF) & data[ASFCONFIG]) ? FUNCTION_ENABLE : FUNCTION_DISABLE;
} else if (arg == ASF_SET) {
value = rtl8168_eri_read(tp, offset, RW_ONE_BYTE, ERIAR_ASF);
if (data[ASFCAPABILITY] == FUNCTION_ENABLE)
value |= data[ASFCONFIG];
else if (data[ASFCAPABILITY] == FUNCTION_DISABLE)
value &= ~data[ASFCONFIG];
rtl8168_eri_write(tp, offset, RW_ONE_BYTE, value, ERIAR_ASF);
}
}
void rtl8168_asf_capability_masks(struct rtl8168_private *tp, int arg, int offset, unsigned int *data)
{
unsigned int len, bit_mask;
bit_mask = DISABLE_MASK;
if (offset == FmCapMsk) {
/* System firmware capabilities */
len = RW_FOUR_BYTES;
if (data[ASFCAPMASK] == FUNCTION_ENABLE)
bit_mask = FMW_CAP_MASK;
} else if (offset == SpCMDMsk) {
/* Special commands */
len = RW_TWO_BYTES;
if (data[ASFCAPMASK] == FUNCTION_ENABLE)
bit_mask = SPC_CMD_MASK;
} else {
/* System capability (offset == SysCapMsk)*/
len = RW_ONE_BYTE;
if (data[ASFCAPMASK] == FUNCTION_ENABLE)
bit_mask = SYS_CAP_MASK;
}
if (arg == ASF_GET)
data[ASFCAPMASK] = rtl8168_eri_read(tp, offset, len, ERIAR_ASF) ? FUNCTION_ENABLE : FUNCTION_DISABLE;
else /* arg == ASF_SET */
rtl8168_eri_write(tp, offset, len, bit_mask, ERIAR_ASF);
}
void rtl8168_asf_community_string(struct rtl8168_private *tp, int arg, char *string)
{
int i;
if (arg == ASF_GET) {
for (i = 0; i < COMMU_STR_MAX_LEN; i++)
string[i] = rtl8168_eri_read(tp, Community + i, RW_ONE_BYTE, ERIAR_ASF);
} else { /* arg == ASF_SET */
for (i = 0; i < COMMU_STR_MAX_LEN; i++)
rtl8168_eri_write(tp, Community + i, RW_ONE_BYTE, string[i], ERIAR_ASF);
}
}
void rtl8168_asf_community_string_len(struct rtl8168_private *tp, int arg, unsigned int *data)
{
if (arg == ASF_GET)
data[ASFCOMMULEN] = rtl8168_eri_read(tp, StringLength, RW_ONE_BYTE, ERIAR_ASF);
else /* arg == ASF_SET */
rtl8168_eri_write(tp, StringLength, RW_ONE_BYTE, data[ASFCOMMULEN], ERIAR_ASF);
}
void rtl8168_asf_time_period(struct rtl8168_private *tp, int arg, int offset, unsigned int *data)
{
int pos = 0;
if (offset == WD8Rst)
pos = ASFWD8RESET;
else if (offset == LSnsrPollCycle)
pos = ASFLSNRPOLLCYC;
else if (offset == ASFSnsrPollPrd)
pos = ASFSNRPOLLCYC;
else if (offset == AlertReSendItvl)
pos = ASFALERTRESND;
if (arg == ASF_GET)
data[pos] = rtl8168_eri_read(tp, offset, RW_ONE_BYTE, ERIAR_ASF);
else /* arg == ASF_SET */
rtl8168_eri_write(tp, offset, RW_ONE_BYTE, data[pos], ERIAR_ASF);
}
void rtl8168_asf_key_access(struct rtl8168_private *tp, int arg, int offset, unsigned int *data)
{
int i, j;
int key_off = 0;
if (arg == ASF_GET) {
for (i = 0; i < KEY_LEN; i++)
data[i] = rtl8168_eri_read(tp, offset + KEY_LEN - (i + 1), RW_ONE_BYTE, ERIAR_ASF);
} else {
if (offset == KO)
key_off = 162;
else if (offset == KA)
key_off = 172;
else if (offset == KG)
key_off = 182;
else if (offset == KR)
key_off = 192;
/* arg == ASF_SET */
for (i = 0; i < KEY_LEN; i++)
rtl8168_eri_write(tp, offset + KEY_LEN - (i + 1), RW_ONE_BYTE, data[i], ERIAR_ASF);
/* write the new key to EEPROM */
for (i = 0, j = 19; i < 10; i++, j = j - 2)
rtl8168_eeprom_write_sc(tp, key_off + i, (data[j - 1] << 8) | data[j]);
}
}
void rtl8168_asf_rw_hexadecimal(struct rtl8168_private *tp, int arg, int offset, int len, unsigned int *data)
{
if (arg == ASF_GET)
data[ASFRWHEXNUM] = rtl8168_eri_read(tp, offset, len, ERIAR_ASF);
else /* arg == ASF_SET */
rtl8168_eri_write(tp, offset, len, data[ASFRWHEXNUM], ERIAR_ASF);
}
void rtl8168_asf_rw_systemid(struct rtl8168_private *tp, int arg, unsigned int *data)
{
int i;
if (arg == ASF_GET)
for (i = 0; i < SYSID_LEN ; i++)
data[i] = rtl8168_eri_read(tp, SysID + i, RW_ONE_BYTE, ERIAR_ASF);
else /* arg == ASF_SET */
for (i = 0; i < SYSID_LEN ; i++)
rtl8168_eri_write(tp, SysID + i, RW_ONE_BYTE, data[i], ERIAR_ASF);
}
void rtl8168_asf_rw_iana(struct rtl8168_private *tp, int arg, unsigned int *data)
{
int i;
if (arg == ASF_GET)
for (i = 0; i < RW_FOUR_BYTES; i++)
data[i] = rtl8168_eri_read(tp, IANA + i, RW_ONE_BYTE, ERIAR_ASF);
else /* arg == ASF_SET */
for (i = 0; i < RW_FOUR_BYTES; i++)
rtl8168_eri_write(tp, IANA + i, RW_ONE_BYTE, data[i], ERIAR_ASF);
}
void rtl8168_asf_rw_uuid(struct rtl8168_private *tp, int arg, unsigned int *data)
{
int i, j;
if (arg == ASF_GET)
for (i = UUID_LEN - 1, j = 0; i >= 0 ; i--, j++)
data[j] = rtl8168_eri_read(tp, UUID + i, RW_ONE_BYTE, ERIAR_ASF);
else /* arg == ASF_SET */
for (i = UUID_LEN - 1, j = 0; i >= 0 ; i--, j++)
rtl8168_eri_write(tp, UUID + i, RW_ONE_BYTE, data[j], ERIAR_ASF);
}

View File

@ -1,295 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
################################################################################
#
# r8168 is the Linux device driver released for Realtek Gigabit Ethernet
# controllers with PCI-Express interface.
#
# Copyright(c) 2022 Realtek Semiconductor Corp. All rights reserved.
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, see <http://www.gnu.org/licenses/>.
#
# Author:
# Realtek NIC software team <nicfae@realtek.com>
# No. 2, Innovation Road II, Hsinchu Science Park, Hsinchu 300, Taiwan
#
################################################################################
*/
/************************************************************************************
* This product is covered by one or more of the following patents:
* US6,570,884, US6,115,776, and US6,327,625.
***********************************************************************************/
#define SIOCDEVPRIVATE_RTLASF SIOCDEVPRIVATE
#define FUNCTION_ENABLE 1
#define FUNCTION_DISABLE 0
#define ASFCONFIG 0
#define ASFCAPABILITY 1
#define ASFCOMMULEN 0
#define ASFHBPERIOD 0
#define ASFWD16RST 0
#define ASFCAPMASK 0
#define ASFALERTRESND 0
#define ASFLSNRPOLLCYC 0
#define ASFSNRPOLLCYC 0
#define ASFWD8RESET 0
#define ASFRWHEXNUM 0
#define FMW_CAP_MASK 0x0000F867
#define SPC_CMD_MASK 0x1F00
#define SYS_CAP_MASK 0xFF
#define DISABLE_MASK 0x00
#define MAX_DATA_LEN 200
#define MAX_STR_LEN 200
#define COMMU_STR_MAX_LEN 23
#define KEY_LEN 20
#define UUID_LEN 16
#define SYSID_LEN 2
#define RW_ONE_BYTE 1
#define RW_TWO_BYTES 2
#define RW_FOUR_BYTES 4
enum asf_registers {
HBPeriod = 0x0000,
WD8Rst = 0x0002,
WD8Timer = 0x0003,
WD16Rst = 0x0004,
LSnsrPollCycle = 0x0006,
ASFSnsrPollPrd = 0x0007,
AlertReSendCnt = 0x0008,
AlertReSendItvl = 0x0009,
SMBAddr = 0x000A,
SMBCap = 0x000B,
ASFConfigR0 = 0x000C,
ASFConfigR1 = 0x000D,
WD16Timer = 0x000E,
ConsoleMA = 0x0010,
ConsoleIP = 0x0016,
IPAddr = 0x001A,
UUID = 0x0020,
IANA = 0x0030,
SysID = 0x0034,
Community = 0x0036,
StringLength = 0x004D,
LC = 0x004E,
EntityInst = 0x004F,
FmCapMsk = 0x0050,
SpCMDMsk = 0x0054,
SysCapMsk = 0x0056,
WDSysSt = 0x0057,
RxMsgType = 0x0058,
RxSpCMD = 0x0059,
RxSpCMDPa = 0x005A,
RxBtOpMsk = 0x005C,
RmtRstAddr = 0x005E,
RmtRstCmd = 0x005F,
RmtRstData = 0x0060,
RmtPwrOffAddr = 0x0061,
RmtPwrOffCmd = 0x0062,
RmtPwrOffData = 0x0063,
RmtPwrOnAddr = 0x0064,
RmtPwrOnCmd = 0x0065,
RmtPwrOnData = 0x0066,
RmtPCRAddr = 0x0067,
RmtPCRCmd = 0x0068,
RmtPCRData = 0x0069,
RMCP_IANA = 0x006A,
RMCP_OEM = 0x006E,
ASFSnsr0Addr = 0x0070,
ASFSnsrEvSt = 0x0073,
ASFSnsrEvAlert = 0x0081,
LSnsrNo = 0x00AD,
AssrtEvntMsk = 0x00AE,
DeAssrtEvntMsk = 0x00AF,
LSnsrAddr0 = 0x00B0,
LAlertCMD0 = 0x00B1,
LAlertDataMsk0 = 0x00B2,
LAlertCmp0 = 0x00B3,
LAlertESnsrT0 = 0x00B4,
LAlertET0 = 0x00B5,
LAlertEOffset0 = 0x00B6,
LAlertES0 = 0x00B7,
LAlertSN0 = 0x00B8,
LAlertEntity0 = 0x00B9,
LAlertEI0 = 0x00BA,
LSnsrState0 = 0x00BB,
LSnsrAddr1 = 0x00BD,
LAlertCMD1 = 0x00BE,
LAlertDataMsk1 = 0x00BF,
LAlertCmp1 = 0x00C0,
LAlertESnsrT1 = 0x00C1,
LAlertET1 = 0x00C2,
LAlertEOffset1 = 0x00C3,
LAlertES1 = 0x00C4,
LAlertSN1 = 0x00C5,
LAlertEntity1 = 0x00C6,
LAlertEI1 = 0x00C7,
LSnsrState1 = 0x00C8,
LSnsrAddr2 = 0x00CA,
LAlertCMD2 = 0x00CB,
LAlertDataMsk2 = 0x00CC,
LAlertCmp2 = 0x00CD,
LAlertESnsrT2 = 0x00CE,
LAlertET2 = 0x00CF,
LAlertEOffset2 = 0x00D0,
LAlertES2 = 0x00D1,
LAlertSN2 = 0x00D2,
LAlertEntity2 = 0x00D3,
LAlertEI2 = 0x00D4,
LSnsrState2 = 0x00D5,
LSnsrAddr3 = 0x00D7,
LAlertCMD3 = 0x00D8,
LAlertDataMsk3 = 0x00D9,
LAlertCmp3 = 0x00DA,
LAlertESnsrT3 = 0x00DB,
LAlertET3 = 0x00DC,
LAlertEOffset3 = 0x00DD,
LAlertES3 = 0x00DE,
LAlertSN3 = 0x00DF,
LAlertEntity3 = 0x00E0,
LAlertEI3 = 0x00E1,
LSnsrState3 = 0x00E2,
LSnsrAddr4 = 0x00E4,
LAlertCMD4 = 0x00E5,
LAlertDataMsk4 = 0x00E6,
LAlertCmp4 = 0x00E7,
LAlertESnsrT4 = 0x00E8,
LAlertET4 = 0x00E9,
LAlertEOffset4 = 0x00EA,
LAlertES4 = 0x00EB,
LAlertSN4 = 0x00EC,
LAlertEntity4 = 0x00ED,
LAlertEI4 = 0x00EE,
LSnsrState4 = 0x00EF,
LSnsrAddr5 = 0x00F1,
LAlertCMD5 = 0x00F2,
LAlertDataMsk5 = 0x00F3,
LAlertCmp5 = 0x00F4,
LAlertESnsrT5 = 0x00F5,
LAlertET5 = 0x00F6,
LAlertEOffset5 = 0x00F7,
LAlertES5 = 0x00F8,
LAlertSN5 = 0x00F9,
LAlertEntity5 = 0x00FA,
LAlertEI5 = 0x00FB,
LSnsrState5 = 0x00FC,
LSnsrAddr6 = 0x00FE,
LAlertCMD6 = 0x00FF,
LAlertDataMsk6 = 0x0100,
LAlertCmp6 = 0x0101,
LAlertESnsrT6 = 0x0102,
LAlertET6 = 0x0103,
LAlertEOffset6 = 0x0104,
LAlertES6 = 0x0105,
LAlertSN6 = 0x0106,
LAlertEntity6 = 0x0107,
LAlertEI6 = 0x0108,
LSnsrState6 = 0x0109,
LSnsrAddr7 = 0x010B,
LAlertCMD7 = 0x010C,
LAlertDataMsk7 = 0x010D,
LAlertCmp7 = 0x010E,
LAlertESnsrT7 = 0x010F,
LAlertET7 = 0x0110,
LAlertEOffset7 = 0x0111,
LAlertES7 = 0x0112,
LAlertSN7 = 0x0113,
LAlertEntity7 = 0x0114,
LAlertEI7 = 0x0115,
LSnsrState7 = 0x0116,
LAssert = 0x0117,
LDAssert = 0x0118,
IPServiceType = 0x0119,
IPIdfr = 0x011A,
FlagFOffset = 0x011C,
TTL = 0x011E,
HbtEI = 0x011F,
MgtConSID1 = 0x0120,
MgtConSID2 = 0x0124,
MgdCltSID = 0x0128,
StCd = 0x012C,
MgtConUR = 0x012D,
MgtConUNL = 0x012E,
AuthPd = 0x0130,
IntyPd = 0x0138,
MgtConRN = 0x0140,
MgdCtlRN = 0x0150,
MgtConUN = 0x0160,
Rakp2IntCk = 0x0170,
KO = 0x017C,
KA = 0x0190,
KG = 0x01A4,
KR = 0x01B8,
CP = 0x01CC,
CQ = 0x01D0,
KC = 0x01D4,
ConsoleSid = 0x01E8,
SIK1 = 0x01FC,
SIK2 = 0x0210,
Udpsrc_port = 0x0224,
Udpdes_port = 0x0226,
Asf_debug_mux = 0x0228
};
enum asf_cmdln_opt {
ASF_GET,
ASF_SET,
ASF_HELP
};
struct asf_ioctl_struct {
unsigned int arg;
unsigned int offset;
union {
unsigned int data[MAX_DATA_LEN];
char string[MAX_STR_LEN];
} u;
};
int rtl8168_asf_ioctl(struct net_device *dev, struct ifreq *ifr);
void rtl8168_asf_hbperiod(struct rtl8168_private *tp, int arg, unsigned int *data);
void rtl8168_asf_wd16rst(struct rtl8168_private *tp, int arg, unsigned int *data);
void rtl8168_asf_console_mac(struct rtl8168_private *, int arg, unsigned int *data);
void rtl8168_asf_ip_address(struct rtl8168_private *, int arg, int offset, unsigned int *data);
void rtl8168_asf_config_regs(struct rtl8168_private *tp, int arg, int offset, unsigned int *data);
void rtl8168_asf_capability_masks(struct rtl8168_private *tp, int arg, int offset, unsigned int *data);
void rtl8168_asf_community_string(struct rtl8168_private *tp, int arg, char *string);
void rtl8168_asf_community_string_len(struct rtl8168_private *tp, int arg, unsigned int *data);
void rtl8168_asf_alert_resend_interval(struct rtl8168_private *tp, int arg, unsigned int *data);
void rtl8168_asf_time_period(struct rtl8168_private *tp, int arg, int offset, unsigned int *data);
void rtl8168_asf_key_access(struct rtl8168_private *, int arg, int offset, unsigned int *data);
void rtl8168_asf_rw_hexadecimal(struct rtl8168_private *tp, int arg, int offset, int len, unsigned int *data);
void rtl8168_asf_rw_iana(struct rtl8168_private *tp, int arg, unsigned int *data);
void rtl8168_asf_rw_uuid(struct rtl8168_private *tp, int arg, unsigned int *data);
void rtl8168_asf_rw_systemid(struct rtl8168_private *tp, int arg, unsigned int *data);

View File

@ -1,256 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
################################################################################
#
# r8168 is the Linux device driver released for Realtek Gigabit Ethernet
# controllers with PCI-Express interface.
#
# Copyright(c) 2022 Realtek Semiconductor Corp. All rights reserved.
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, see <http://www.gnu.org/licenses/>.
#
# Author:
# Realtek NIC software team <nicfae@realtek.com>
# No. 2, Innovation Road II, Hsinchu Science Park, Hsinchu 300, Taiwan
#
################################################################################
*/
/************************************************************************************
* This product is covered by one or more of the following patents:
* US6,570,884, US6,115,776, and US6,327,625.
***********************************************************************************/
#ifndef _LINUX_R8168_DASH_H
#define _LINUX_R8168_DASH_H
#define SIOCDEVPRIVATE_RTLDASH SIOCDEVPRIVATE+2
enum rtl_dash_cmd {
RTL_DASH_ARP_NS_OFFLOAD = 0,
RTL_DASH_SET_OOB_IPMAC,
RTL_DASH_NOTIFY_OOB,
RTL_DASH_SEND_BUFFER_DATA_TO_DASH_FW,
RTL_DASH_CHECK_SEND_BUFFER_TO_DASH_FW_COMPLETE,
RTL_DASH_GET_RCV_FROM_FW_BUFFER_DATA,
RTL_DASH_OOB_REQ,
RTL_DASH_OOB_ACK,
RTL_DASH_DETACH_OOB_REQ,
RTL_DASH_DETACH_OOB_ACK,
RTL_FW_SET_IPV4 = 0x10,
RTL_FW_GET_IPV4,
RTL_FW_SET_IPV6,
RTL_FW_GET_IPV6,
RTL_FW_SET_EXT_SNMP,
RTL_FW_GET_EXT_SNMP,
RTL_FW_SET_WAKEUP_PATTERN,
RTL_FW_GET_WAKEUP_PATTERN,
RTL_FW_DEL_WAKEUP_PATTERN,
RTLT_DASH_COMMAND_INVALID,
};
struct rtl_dash_ip_mac {
struct sockaddr ifru_addr;
struct sockaddr ifru_netmask;
struct sockaddr ifru_hwaddr;
};
struct rtl_dash_ioctl_struct {
__u32 cmd;
__u32 offset;
__u32 len;
union {
__u32 data;
void *data_buffer;
};
};
struct settings_ipv4 {
__u32 IPv4addr;
__u32 IPv4mask;
__u32 IPv4Gateway;
};
struct settings_ipv6 {
__u32 reserved;
__u32 prefixLen;
__u16 IPv6addr[8];
__u16 IPv6Gateway[8];
};
struct settings_ext_snmp {
__u16 index;
__u16 oid_get_len;
__u8 oid_for_get[24];
__u8 reserved0[26];
__u16 value_len;
__u8 value[256];
__u8 supported;
__u8 reserved1[27];
};
struct wakeup_pattern {
__u8 index;
__u8 valid;
__u8 start;
__u8 length;
__u8 name[36];
__u8 mask[16];
__u8 pattern[128];
__u32 reserved[2];
};
typedef struct _RX_DASH_FROM_FW_DESC {
__le16 length;
__le16 status;
__le32 resv;
__le64 BufferAddress;
}
RX_DASH_FROM_FW_DESC, *PRX_DASH_FROM_FW_DESC;
typedef struct _TX_DASH_SEND_FW_DESC {
__le16 length;
__le16 status;
__le32 resv;
__le64 BufferAddress;
}
TX_DASH_SEND_FW_DESC, *PTX_DASH_SEND_FW_DESC;
typedef struct _OSOOBHdr {
__le32 len;
u8 type;
u8 flag;
u8 hostReqV;
u8 res;
}
OSOOBHdr, *POSOOBHdr;
typedef struct _RX_DASH_BUFFER_TYPE_2 {
OSOOBHdr oobhdr;
u8 RxDataBuffer[0];
}
RX_DASH_BUFFER_TYPE_2, *PRX_DASH_BUFFER_TYPE_2;
#define ALIGN_8 (0x7)
#define ALIGN_16 (0xf)
#define ALIGN_32 (0x1f)
#define ALIGN_64 (0x3f)
#define ALIGN_256 (0xff)
#define ALIGN_4096 (0xfff)
#define OCP_REG_CONFIG0 (0x10)
#define OCP_REG_CONFIG0_REV_F (0xB8)
#define OCP_REG_DASH_POLL (0x30)
#define OCP_REG_HOST_REQ (0x34)
#define OCP_REG_DASH_REQ (0x35)
#define OCP_REG_CR (0x36)
#define OCP_REG_DMEMSTA (0x38)
#define OCP_REG_GPHYAR (0x60)
#define OCP_REG_CONFIG0_DASHEN BIT_15
#define OCP_REG_CONFIG0_OOBRESET BIT_14
#define OCP_REG_CONFIG0_APRDY BIT_13
#define OCP_REG_CONFIG0_FIRMWARERDY BIT_12
#define OCP_REG_CONFIG0_DRIVERRDY BIT_11
#define OCP_REG_CONFIG0_OOB_WDT BIT_9
#define OCP_REG_CONFIG0_DRV_WAIT_OOB BIT_8
#define OCP_REG_CONFIG0_TLSEN BIT_7
#define HW_DASH_SUPPORT_DASH(_M) ((_M)->HwSuppDashVer > 0)
#define HW_DASH_SUPPORT_TYPE_1(_M) ((_M)->HwSuppDashVer == 1)
#define HW_DASH_SUPPORT_TYPE_2(_M) ((_M)->HwSuppDashVer == 2)
#define HW_DASH_SUPPORT_TYPE_3(_M) ((_M)->HwSuppDashVer == 3)
#define RECV_FROM_FW_BUF_SIZE (2048)
#define SEND_TO_FW_BUF_SIZE (2048)
#define RX_DASH_FROM_FW_OWN BIT_15
#define TX_DASH_SEND_FW_OWN BIT_15
#define TXS_CC3_0 (BIT_0|BIT_1|BIT_2|BIT_3)
#define TXS_EXC BIT_4
#define TXS_LNKF BIT_5
#define TXS_OWC BIT_6
#define TXS_TES BIT_7
#define TXS_UNF BIT_9
#define TXS_LGSEN BIT_11
#define TXS_LS BIT_12
#define TXS_FS BIT_13
#define TXS_EOR BIT_14
#define TXS_OWN BIT_15
#define TPPool_HRDY 0x20
#define HostReqReg (0xC0)
#define SystemMasterDescStartAddrLow (0xF0)
#define SystemMasterDescStartAddrHigh (0xF4)
#define SystemSlaveDescStartAddrLow (0xF8)
#define SystemSlaveDescStartAddrHigh (0xFC)
//DASH Request Type
#define WSMANREG 0x01
#define OSPUSHDATA 0x02
#define RXS_OWN BIT_15
#define RXS_EOR BIT_14
#define RXS_FS BIT_13
#define RXS_LS BIT_12
#define ISRIMR_DP_DASH_OK BIT_15
#define ISRIMR_DP_HOST_OK BIT_13
#define ISRIMR_DP_REQSYS_OK BIT_11
#define ISRIMR_DASH_INTR_EN BIT_12
#define ISRIMR_DASH_INTR_CMAC_RESET BIT_15
#define ISRIMR_DASH_TYPE2_ROK BIT_0
#define ISRIMR_DASH_TYPE2_RDU BIT_1
#define ISRIMR_DASH_TYPE2_TOK BIT_2
#define ISRIMR_DASH_TYPE2_TDU BIT_3
#define ISRIMR_DASH_TYPE2_TX_FIFO_FULL BIT_4
#define ISRIMR_DASH_TYPE2_TX_DISABLE_IDLE BIT_5
#define ISRIMR_DASH_TYPE2_RX_DISABLE_IDLE BIT_6
#define CMAC_OOB_STOP 0x25
#define CMAC_OOB_INIT 0x26
#define CMAC_OOB_RESET 0x2a
#define NO_BASE_ADDRESS 0x00000000
#define RTL8168FP_OOBMAC_BASE 0xBAF70000
#define RTL8168FP_CMAC_IOBASE 0xBAF20000
#define RTL8168FP_KVM_BASE 0xBAF80400
#define CMAC_SYNC_REG 0x20
#define CMAC_RXDESC_OFFSET 0x90 //RX: 0x90 - 0x98
#define CMAC_TXDESC_OFFSET 0x98 //TX: 0x98 - 0x9F
/* cmac write/read MMIO register */
#define RTL_CMAC_W8(tp, reg, val8) writeb ((val8), tp->cmac_ioaddr + (reg))
#define RTL_CMAC_W16(tp, reg, val16) writew ((val16), tp->cmac_ioaddr + (reg))
#define RTL_CMAC_W32(tp, reg, val32) writel ((val32), tp->cmac_ioaddr + (reg))
#define RTL_CMAC_R8(tp, reg) readb (tp->cmac_ioaddr + (reg))
#define RTL_CMAC_R16(tp, reg) readw (tp->cmac_ioaddr + (reg))
#define RTL_CMAC_R32(tp, reg) ((unsigned long) readl (tp->cmac_ioaddr + (reg)))
int rtl8168_dash_ioctl(struct net_device *dev, struct ifreq *ifr);
void HandleDashInterrupt(struct net_device *dev);
int AllocateDashShareMemory(struct net_device *dev);
void FreeAllocatedDashShareMemory(struct net_device *dev);
void DashHwInit(struct net_device *dev);
#endif /* _LINUX_R8168_DASH_H */

View File

@ -1,75 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
################################################################################
#
# r8168 is the Linux device driver released for Realtek Gigabit Ethernet
# controllers with PCI-Express interface.
#
# Copyright(c) 2022 Realtek Semiconductor Corp. All rights reserved.
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, see <http://www.gnu.org/licenses/>.
#
# Author:
# Realtek NIC software team <nicfae@realtek.com>
# No. 2, Innovation Road II, Hsinchu Science Park, Hsinchu 300, Taiwan
#
################################################################################
*/
/************************************************************************************
* This product is covered by one or more of the following patents:
* US6,570,884, US6,115,776, and US6,327,625.
***********************************************************************************/
#ifndef _LINUX_R8168_FIBER_H
#define _LINUX_R8168_FIBER_H
enum {
FIBER_MODE_NIC_ONLY = 0,
FIBER_MODE_RTL8168H_RTL8211FS,
FIBER_MODE_RTL8168H_MDI_SWITCH_RTL8211FS,
FIBER_MODE_MAX
};
enum {
FIBER_STAT_NOT_CHECKED = 0,
FIBER_STAT_CONNECT_EEPROM,
FIBER_STAT_DISCONNECT,
FIBER_STAT_CONNECT_GPO,
FIBER_STAT_MAX
};
enum {
FIBER_LED_MODE_DEFAULT = 0,
FIBER_LED_MODE_1,
FIBER_LED_MODE_MAX
};
#define HW_FIBER_MODE_ENABLED(_M) ((_M)->HwFiberModeVer > 0)
#define HW_FIBER_STATUS_CONNECTED(_M) (((_M)->HwFiberStat == FIBER_STAT_CONNECT_EEPROM) || ((_M)->HwFiberStat == FIBER_STAT_CONNECT_GPO))
#define HW_FIBER_STATUS_DISCONNECTED(_M) ((_M)->HwFiberStat == FIBER_STAT_DISCONNECT)
struct rtl8168_private;
void rtl8168_hw_init_fiber_nic(struct rtl8168_private *tp);
void rtl8168_hw_fiber_nic_d3_para(struct rtl8168_private *tp);
void rtl8168_hw_fiber_phy_config(struct rtl8168_private *tp);
void rtl8168_hw_switch_mdi_to_fiber(struct rtl8168_private *tp);
void rtl8168_hw_switch_mdi_to_nic(struct rtl8168_private *tp);
unsigned int rtl8168_hw_fiber_link_ok(struct rtl8168_private *tp);
void rtl8168_check_fiber_link_status(struct rtl8168_private *tp);
void rtl8168_check_hw_fiber_mode_support(struct rtl8168_private *tp);
void rtl8168_set_fiber_mode_software_variable(struct rtl8168_private *tp);
#endif /* _LINUX_R8168_FIBER_H */

View File

@ -1,264 +0,0 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
################################################################################
#
# r8168 is the Linux device driver released for Realtek Gigabit Ethernet
# controllers with PCI-Express interface.
#
# Copyright(c) 2022 Realtek Semiconductor Corp. All rights reserved.
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, see <http://www.gnu.org/licenses/>.
#
# Author:
# Realtek NIC software team <nicfae@realtek.com>
# No. 2, Innovation Road II, Hsinchu Science Park, Hsinchu 300, Taiwan
#
################################################################################
*/
/************************************************************************************
* This product is covered by one or more of the following patents:
* US6,570,884, US6,115,776, and US6,327,625.
***********************************************************************************/
#include <linux/version.h>
#include <linux/delay.h>
#include <linux/firmware.h>
#include "r8168_firmware.h"
enum rtl_fw_opcode {
PHY_READ = 0x0,
PHY_DATA_OR = 0x1,
PHY_DATA_AND = 0x2,
PHY_BJMPN = 0x3,
PHY_MDIO_CHG = 0x4,
PHY_CLEAR_READCOUNT = 0x7,
PHY_WRITE = 0x8,
PHY_READCOUNT_EQ_SKIP = 0x9,
PHY_COMP_EQ_SKIPN = 0xa,
PHY_COMP_NEQ_SKIPN = 0xb,
PHY_WRITE_PREVIOUS = 0xc,
PHY_SKIPN = 0xd,
PHY_DELAY_MS = 0xe,
};
struct fw_info {
u32 magic;
char version[RTL8168_VER_SIZE];
__le32 fw_start;
__le32 fw_len;
u8 chksum;
} __packed;
#if LINUX_VERSION_CODE < KERNEL_VERSION(4,16,0)
#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
#endif
#define FW_OPCODE_SIZE sizeof_field(struct rtl8168_fw_phy_action, code[0])
static bool rtl8168_fw_format_ok(struct rtl8168_fw *rtl_fw)
{
const struct firmware *fw = rtl_fw->fw;
struct fw_info *fw_info = (struct fw_info *)fw->data;
struct rtl8168_fw_phy_action *pa = &rtl_fw->phy_action;
if (fw->size < FW_OPCODE_SIZE)
return false;
if (!fw_info->magic) {
size_t i, size, start;
u8 checksum = 0;
if (fw->size < sizeof(*fw_info))
return false;
for (i = 0; i < fw->size; i++)
checksum += fw->data[i];
if (checksum != 0)
return false;
start = le32_to_cpu(fw_info->fw_start);
if (start > fw->size)
return false;
size = le32_to_cpu(fw_info->fw_len);
if (size > (fw->size - start) / FW_OPCODE_SIZE)
return false;
strscpy(rtl_fw->version, fw_info->version, RTL8168_VER_SIZE);
pa->code = (__le32 *)(fw->data + start);
pa->size = size;
} else {
if (fw->size % FW_OPCODE_SIZE)
return false;
strscpy(rtl_fw->version, rtl_fw->fw_name, RTL8168_VER_SIZE);
pa->code = (__le32 *)fw->data;
pa->size = fw->size / FW_OPCODE_SIZE;
}
return true;
}
static bool rtl8168_fw_data_ok(struct rtl8168_fw *rtl_fw)
{
struct rtl8168_fw_phy_action *pa = &rtl_fw->phy_action;
size_t index;
for (index = 0; index < pa->size; index++) {
u32 action = le32_to_cpu(pa->code[index]);
u32 val = action & 0x0000ffff;
u32 regno = (action & 0x0fff0000) >> 16;
switch (action >> 28) {
case PHY_READ:
case PHY_DATA_OR:
case PHY_DATA_AND:
case PHY_CLEAR_READCOUNT:
case PHY_WRITE:
case PHY_WRITE_PREVIOUS:
case PHY_DELAY_MS:
break;
case PHY_MDIO_CHG:
if (val > 1)
goto out;
break;
case PHY_BJMPN:
if (regno > index)
goto out;
break;
case PHY_READCOUNT_EQ_SKIP:
if (index + 2 >= pa->size)
goto out;
break;
case PHY_COMP_EQ_SKIPN:
case PHY_COMP_NEQ_SKIPN:
case PHY_SKIPN:
if (index + 1 + regno >= pa->size)
goto out;
break;
default:
dev_err(rtl_fw->dev, "Invalid action 0x%08x\n", action);
return false;
}
}
return true;
out:
dev_err(rtl_fw->dev, "Out of range of firmware\n");
return false;
}
void rtl8168_fw_write_firmware(struct rtl8168_private *tp, struct rtl8168_fw *rtl_fw)
{
struct rtl8168_fw_phy_action *pa = &rtl_fw->phy_action;
rtl8168_fw_write_t fw_write = rtl_fw->phy_write;
rtl8168_fw_read_t fw_read = rtl_fw->phy_read;
int predata = 0, count = 0;
size_t index;
for (index = 0; index < pa->size; index++) {
u32 action = le32_to_cpu(pa->code[index]);
u32 data = action & 0x0000ffff;
u32 regno = (action & 0x0fff0000) >> 16;
enum rtl_fw_opcode opcode = action >> 28;
if (!action)
break;
switch (opcode) {
case PHY_READ:
predata = fw_read(tp, regno);
count++;
break;
case PHY_DATA_OR:
predata |= data;
break;
case PHY_DATA_AND:
predata &= data;
break;
case PHY_BJMPN:
index -= (regno + 1);
break;
case PHY_MDIO_CHG:
if (data) {
fw_write = rtl_fw->mac_mcu_write;
fw_read = rtl_fw->mac_mcu_read;
} else {
fw_write = rtl_fw->phy_write;
fw_read = rtl_fw->phy_read;
}
break;
case PHY_CLEAR_READCOUNT:
count = 0;
break;
case PHY_WRITE:
fw_write(tp, regno, data);
break;
case PHY_READCOUNT_EQ_SKIP:
if (count == data)
index++;
break;
case PHY_COMP_EQ_SKIPN:
if (predata == data)
index += regno;
break;
case PHY_COMP_NEQ_SKIPN:
if (predata != data)
index += regno;
break;
case PHY_WRITE_PREVIOUS:
fw_write(tp, regno, predata);
break;
case PHY_SKIPN:
index += regno;
break;
case PHY_DELAY_MS:
mdelay(data);
break;
}
}
}
void rtl8168_fw_release_firmware(struct rtl8168_fw *rtl_fw)
{
release_firmware(rtl_fw->fw);
}
int rtl8168_fw_request_firmware(struct rtl8168_fw *rtl_fw)
{
int rc;
rc = request_firmware(&rtl_fw->fw, rtl_fw->fw_name, rtl_fw->dev);
if (rc < 0)
goto out;
if (!rtl8168_fw_format_ok(rtl_fw) || !rtl8168_fw_data_ok(rtl_fw)) {
release_firmware(rtl_fw->fw);
rc = -EINVAL;
goto out;
}
return 0;
out:
dev_err(rtl_fw->dev, "Unable to load firmware %s (%d)\n",
rtl_fw->fw_name, rc);
return rc;
}

View File

@ -1,68 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
################################################################################
#
# r8168 is the Linux device driver released for Realtek 2.5Gigabit Ethernet
# controllers with PCI-Express interface.
#
# Copyright(c) 2022 Realtek Semiconductor Corp. All rights reserved.
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, see <http://www.gnu.org/licenses/>.
#
# Author:
# Realtek NIC software team <nicfae@realtek.com>
# No. 2, Innovation Road II, Hsinchu Science Park, Hsinchu 300, Taiwan
#
################################################################################
*/
/************************************************************************************
* This product is covered by one or more of the following patents:
* US6,570,884, US6,115,776, and US6,327,625.
***********************************************************************************/
#ifndef _LINUX_RTL8168_FIRMWARE_H
#define _LINUX_RTL8168_FIRMWARE_H
#include <linux/device.h>
#include <linux/firmware.h>
struct rtl8168_private;
typedef void (*rtl8168_fw_write_t)(struct rtl8168_private *tp, u16 reg, u16 val);
typedef u32 (*rtl8168_fw_read_t)(struct rtl8168_private *tp, u16 reg);
#define RTL8168_VER_SIZE 32
struct rtl8168_fw {
rtl8168_fw_write_t phy_write;
rtl8168_fw_read_t phy_read;
rtl8168_fw_write_t mac_mcu_write;
rtl8168_fw_read_t mac_mcu_read;
const struct firmware *fw;
const char *fw_name;
struct device *dev;
char version[RTL8168_VER_SIZE];
struct rtl8168_fw_phy_action {
__le32 *code;
size_t size;
} phy_action;
};
int rtl8168_fw_request_firmware(struct rtl8168_fw *rtl_fw);
void rtl8168_fw_release_firmware(struct rtl8168_fw *rtl_fw);
void rtl8168_fw_write_firmware(struct rtl8168_private *tp, struct rtl8168_fw *rtl_fw);
#endif /* _LINUX_RTL8168_FIRMWARE_H */

File diff suppressed because it is too large Load Diff

View File

@ -1,118 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
################################################################################
#
# r8168 is the Linux device driver released for Realtek Gigabit Ethernet
# controllers with PCI-Express interface.
#
# Copyright(c) 2022 Realtek Semiconductor Corp. All rights reserved.
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, see <http://www.gnu.org/licenses/>.
#
# Author:
# Realtek NIC software team <nicfae@realtek.com>
# No. 2, Innovation Road II, Hsinchu Science Park, Hsinchu 300, Taiwan
#
################################################################################
*/
/************************************************************************************
* This product is covered by one or more of the following patents:
* US6,570,884, US6,115,776, and US6,327,625.
***********************************************************************************/
#ifndef _LINUX_R8168_REALWOW_H
#define _LINUX_R8168_REALWOW_H
#define SIOCDEVPRIVATE_RTLREALWOW SIOCDEVPRIVATE+3
#define MAX_RealWoW_KCP_SIZE (100)
#define MAX_RealWoW_Payload (64)
#define KA_TX_PACKET_SIZE (100)
#define KA_WAKEUP_PATTERN_SIZE (120)
//HwSuppKeepAliveOffloadVer
#define HW_SUPPORT_KCP_OFFLOAD(_M) ((_M)->HwSuppKCPOffloadVer > 0)
enum rtl_realwow_cmd {
RTL_REALWOW_SET_KCP_DISABLE=0,
RTL_REALWOW_SET_KCP_INFO,
RTL_REALWOW_SET_KCP_CONTENT,
RTL_REALWOW_SET_KCP_ACKPKTINFO,
RTL_REALWOW_SET_KCP_WPINFO,
RTL_REALWOW_SET_KCPDHCP_TIMEOUT,
RTLT_REALWOW_COMMAND_INVALID
};
struct rtl_realwow_ioctl_struct {
__u32 cmd;
__u32 offset;
__u32 len;
union {
__u32 data;
void *data_buffer;
};
};
typedef struct _MP_KCPInfo {
u8 DIPv4[4];
u8 MacID[6];
u16 UdpPort[2];
u8 PKTLEN[2];
u16 ackLostCnt;
u8 KCP_WakePattern[MAX_RealWoW_Payload];
u8 KCP_AckPacket[MAX_RealWoW_Payload];
u32 KCP_interval;
u8 KCP_WakePattern_Len;
u8 KCP_AckPacket_Len;
u8 KCP_TxPacket[2][KA_TX_PACKET_SIZE];
} MP_KCP_INFO, *PMP_KCP_INFO;
typedef struct _KCPInfo {
u32 nId; // = id
u8 DIPv4[4];
u8 MacID[6];
u16 UdpPort;
u16 PKTLEN;
} KCPInfo, *PKCPInfo;
typedef struct _KCPContent {
u32 id; // = id
u32 mSec; // = msec
u32 size; // =size
u8 bPacket[MAX_RealWoW_KCP_SIZE]; // put packet here
} KCPContent, *PKCPContent;
typedef struct _RealWoWAckPktInfo {
u16 ackLostCnt;
u16 patterntSize;
u8 pattern[MAX_RealWoW_Payload];
} RealWoWAckPktInfo,*PRealWoWAckPktInfo;
typedef struct _RealWoWWPInfo {
u16 patterntSize;
u8 pattern[MAX_RealWoW_Payload];
} RealWoWWPInfo,*PRealWoWWPInfo;
int rtl8168_realwow_ioctl(struct net_device *dev, struct ifreq *ifr);
void rtl8168_realwow_hw_init(struct net_device *dev);
void rtl8168_get_realwow_hw_version(struct net_device *dev);
void rtl8168_set_realwow_d3_para(struct net_device *dev);
#endif /* _LINUX_R8168_REALWOW_H */

View File

@ -1,289 +0,0 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
################################################################################
#
# r8168 is the Linux device driver released for Realtek Gigabit Ethernet
# controllers with PCI-Express interface.
#
# Copyright(c) 2022 Realtek Semiconductor Corp. All rights reserved.
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, see <http://www.gnu.org/licenses/>.
#
# Author:
# Realtek NIC software team <nicfae@realtek.com>
# No. 2, Innovation Road II, Hsinchu Science Park, Hsinchu 300, Taiwan
#
################################################################################
*/
/************************************************************************************
* This product is covered by one or more of the following patents:
* US6,570,884, US6,115,776, and US6,327,625.
***********************************************************************************/
#include <linux/init.h>
#include <linux/module.h>
#include <linux/version.h>
#include <linux/ethtool.h>
#include <linux/netdevice.h>
#include <linux/delay.h>
#include <asm/io.h>
#include "r8168.h"
#include "rtl_eeprom.h"
//-------------------------------------------------------------------
//rtl8168_eeprom_type():
// tell the eeprom type
//return value:
// 0: the eeprom type is 93C46
// 1: the eeprom type is 93C56 or 93C66
//-------------------------------------------------------------------
void rtl8168_eeprom_type(struct rtl8168_private *tp)
{
u16 magic = 0;
if (tp->mcfg == CFG_METHOD_DEFAULT)
goto out_no_eeprom;
if(RTL_R8(tp, 0xD2)&0x04) {
//not support
//tp->eeprom_type = EEPROM_TWSI;
//tp->eeprom_len = 256;
goto out_no_eeprom;
} else if(RTL_R32(tp, RxConfig) & RxCfg_9356SEL) {
tp->eeprom_type = EEPROM_TYPE_93C56;
tp->eeprom_len = 256;
} else {
tp->eeprom_type = EEPROM_TYPE_93C46;
tp->eeprom_len = 128;
}
magic = rtl8168_eeprom_read_sc(tp, 0);
out_no_eeprom:
if ((magic != 0x8129) && (magic != 0x8128)) {
tp->eeprom_type = EEPROM_TYPE_NONE;
tp->eeprom_len = 0;
}
}
void rtl8168_eeprom_cleanup(struct rtl8168_private *tp)
{
u8 x;
x = RTL_R8(tp, Cfg9346);
x &= ~(Cfg9346_EEDI | Cfg9346_EECS);
RTL_W8(tp, Cfg9346, x);
rtl8168_raise_clock(tp, &x);
rtl8168_lower_clock(tp, &x);
}
int rtl8168_eeprom_cmd_done(struct rtl8168_private *tp)
{
u8 x;
int i;
rtl8168_stand_by(tp);
for (i = 0; i < 50000; i++) {
x = RTL_R8(tp, Cfg9346);
if (x & Cfg9346_EEDO) {
udelay(RTL_CLOCK_RATE * 2 * 3);
return 0;
}
udelay(1);
}
return -1;
}
//-------------------------------------------------------------------
//rtl8168_eeprom_read_sc():
// read one word from eeprom
//-------------------------------------------------------------------
u16 rtl8168_eeprom_read_sc(struct rtl8168_private *tp, u16 reg)
{
int addr_sz = 6;
u8 x;
u16 data;
if(tp->eeprom_type == EEPROM_TYPE_NONE) {
return -1;
}
if (tp->eeprom_type==EEPROM_TYPE_93C46)
addr_sz = 6;
else if (tp->eeprom_type==EEPROM_TYPE_93C56)
addr_sz = 8;
x = Cfg9346_EEM1 | Cfg9346_EECS;
RTL_W8(tp, Cfg9346, x);
rtl8168_shift_out_bits(tp, RTL_EEPROM_READ_OPCODE, 3);
rtl8168_shift_out_bits(tp, reg, addr_sz);
data = rtl8168_shift_in_bits(tp);
rtl8168_eeprom_cleanup(tp);
RTL_W8(tp, Cfg9346, 0);
return data;
}
//-------------------------------------------------------------------
//rtl8168_eeprom_write_sc():
// write one word to a specific address in the eeprom
//-------------------------------------------------------------------
void rtl8168_eeprom_write_sc(struct rtl8168_private *tp, u16 reg, u16 data)
{
u8 x;
int addr_sz = 6;
int w_dummy_addr = 4;
if(tp->eeprom_type == EEPROM_TYPE_NONE) {
return ;
}
if (tp->eeprom_type==EEPROM_TYPE_93C46) {
addr_sz = 6;
w_dummy_addr = 4;
} else if (tp->eeprom_type==EEPROM_TYPE_93C56) {
addr_sz = 8;
w_dummy_addr = 6;
}
x = Cfg9346_EEM1 | Cfg9346_EECS;
RTL_W8(tp, Cfg9346, x);
rtl8168_shift_out_bits(tp, RTL_EEPROM_EWEN_OPCODE, 5);
rtl8168_shift_out_bits(tp, reg, w_dummy_addr);
rtl8168_stand_by(tp);
rtl8168_shift_out_bits(tp, RTL_EEPROM_ERASE_OPCODE, 3);
rtl8168_shift_out_bits(tp, reg, addr_sz);
if (rtl8168_eeprom_cmd_done(tp) < 0) {
return;
}
rtl8168_stand_by(tp);
rtl8168_shift_out_bits(tp, RTL_EEPROM_WRITE_OPCODE, 3);
rtl8168_shift_out_bits(tp, reg, addr_sz);
rtl8168_shift_out_bits(tp, data, 16);
if (rtl8168_eeprom_cmd_done(tp) < 0) {
return;
}
rtl8168_stand_by(tp);
rtl8168_shift_out_bits(tp, RTL_EEPROM_EWDS_OPCODE, 5);
rtl8168_shift_out_bits(tp, reg, w_dummy_addr);
rtl8168_eeprom_cleanup(tp);
RTL_W8(tp, Cfg9346, 0);
}
void rtl8168_raise_clock(struct rtl8168_private *tp, u8 *x)
{
*x = *x | Cfg9346_EESK;
RTL_W8(tp, Cfg9346, *x);
udelay(RTL_CLOCK_RATE);
}
void rtl8168_lower_clock(struct rtl8168_private *tp, u8 *x)
{
*x = *x & ~Cfg9346_EESK;
RTL_W8(tp, Cfg9346, *x);
udelay(RTL_CLOCK_RATE);
}
void rtl8168_shift_out_bits(struct rtl8168_private *tp, int data, int count)
{
u8 x;
int mask;
mask = 0x01 << (count - 1);
x = RTL_R8(tp, Cfg9346);
x &= ~(Cfg9346_EEDI | Cfg9346_EEDO);
do {
if (data & mask)
x |= Cfg9346_EEDI;
else
x &= ~Cfg9346_EEDI;
RTL_W8(tp, Cfg9346, x);
udelay(RTL_CLOCK_RATE);
rtl8168_raise_clock(tp, &x);
rtl8168_lower_clock(tp, &x);
mask = mask >> 1;
} while(mask);
x &= ~Cfg9346_EEDI;
RTL_W8(tp, Cfg9346, x);
}
u16 rtl8168_shift_in_bits(struct rtl8168_private *tp)
{
u8 x;
u16 d, i;
x = RTL_R8(tp, Cfg9346);
x &= ~(Cfg9346_EEDI | Cfg9346_EEDO);
d = 0;
for (i = 0; i < 16; i++) {
d = d << 1;
rtl8168_raise_clock(tp, &x);
x = RTL_R8(tp, Cfg9346);
x &= ~Cfg9346_EEDI;
if (x & Cfg9346_EEDO)
d |= 1;
rtl8168_lower_clock(tp, &x);
}
return d;
}
void rtl8168_stand_by(struct rtl8168_private *tp)
{
u8 x;
x = RTL_R8(tp, Cfg9346);
x &= ~(Cfg9346_EECS | Cfg9346_EESK);
RTL_W8(tp, Cfg9346, x);
udelay(RTL_CLOCK_RATE);
x |= Cfg9346_EECS;
RTL_W8(tp, Cfg9346, x);
}
void rtl8168_set_eeprom_sel_low(struct rtl8168_private *tp)
{
RTL_W8(tp, Cfg9346, Cfg9346_EEM1);
RTL_W8(tp, Cfg9346, Cfg9346_EEM1 | Cfg9346_EESK);
udelay(20);
RTL_W8(tp, Cfg9346, Cfg9346_EEM1);
}

View File

@ -1,56 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
################################################################################
#
# r8168 is the Linux device driver released for Realtek Gigabit Ethernet
# controllers with PCI-Express interface.
#
# Copyright(c) 2022 Realtek Semiconductor Corp. All rights reserved.
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, see <http://www.gnu.org/licenses/>.
#
# Author:
# Realtek NIC software team <nicfae@realtek.com>
# No. 2, Innovation Road II, Hsinchu Science Park, Hsinchu 300, Taiwan
#
################################################################################
*/
/************************************************************************************
* This product is covered by one or more of the following patents:
* US6,570,884, US6,115,776, and US6,327,625.
***********************************************************************************/
//EEPROM opcodes
#define RTL_EEPROM_READ_OPCODE 06
#define RTL_EEPROM_WRITE_OPCODE 05
#define RTL_EEPROM_ERASE_OPCODE 07
#define RTL_EEPROM_EWEN_OPCODE 19
#define RTL_EEPROM_EWDS_OPCODE 16
#define RTL_CLOCK_RATE 3
void rtl8168_eeprom_type(struct rtl8168_private *tp);
void rtl8168_eeprom_cleanup(struct rtl8168_private *tp);
u16 rtl8168_eeprom_read_sc(struct rtl8168_private *tp, u16 reg);
void rtl8168_eeprom_write_sc(struct rtl8168_private *tp, u16 reg, u16 data);
void rtl8168_shift_out_bits(struct rtl8168_private *tp, int data, int count);
u16 rtl8168_shift_in_bits(struct rtl8168_private *tp);
void rtl8168_raise_clock(struct rtl8168_private *tp, u8 *x);
void rtl8168_lower_clock(struct rtl8168_private *tp, u8 *x);
void rtl8168_stand_by(struct rtl8168_private *tp);
void rtl8168_set_eeprom_sel_low(struct rtl8168_private *tp);

View File

@ -1,299 +0,0 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
################################################################################
#
# r8168 is the Linux device driver released for Realtek Gigabit Ethernet
# controllers with PCI-Express interface.
#
# Copyright(c) 2022 Realtek Semiconductor Corp. All rights reserved.
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, see <http://www.gnu.org/licenses/>.
#
# Author:
# Realtek NIC software team <nicfae@realtek.com>
# No. 2, Innovation Road II, Hsinchu Science Park, Hsinchu 300, Taiwan
#
################################################################################
*/
/************************************************************************************
* This product is covered by one or more of the following patents:
* US6,570,884, US6,115,776, and US6,327,625.
***********************************************************************************/
#include <linux/module.h>
#include <linux/version.h>
#include <linux/pci.h>
#include <linux/netdevice.h>
#include <linux/delay.h>
#include <linux/in.h>
#include <linux/ethtool.h>
#include <asm/uaccess.h>
#include "r8168.h"
#include "rtl_eeprom.h"
#include "rtltool.h"
int rtl8168_tool_ioctl(struct rtl8168_private *tp, struct ifreq *ifr)
{
struct rtltool_cmd my_cmd;
unsigned long flags;
int ret;
if (copy_from_user(&my_cmd, ifr->ifr_data, sizeof(my_cmd)))
return -EFAULT;
ret = 0;
switch (my_cmd.cmd) {
case RTLTOOL_READ_MAC:
if (my_cmd.len==1)
my_cmd.data = readb(tp->mmio_addr+my_cmd.offset);
else if (my_cmd.len==2)
my_cmd.data = readw(tp->mmio_addr+(my_cmd.offset&~1));
else if (my_cmd.len==4)
my_cmd.data = readl(tp->mmio_addr+(my_cmd.offset&~3));
else {
ret = -EOPNOTSUPP;
break;
}
if (copy_to_user(ifr->ifr_data, &my_cmd, sizeof(my_cmd))) {
ret = -EFAULT;
break;
}
break;
case RTLTOOL_WRITE_MAC:
if (my_cmd.len==1)
writeb(my_cmd.data, tp->mmio_addr+my_cmd.offset);
else if (my_cmd.len==2)
writew(my_cmd.data, tp->mmio_addr+(my_cmd.offset&~1));
else if (my_cmd.len==4)
writel(my_cmd.data, tp->mmio_addr+(my_cmd.offset&~3));
else {
ret = -EOPNOTSUPP;
break;
}
break;
case RTLTOOL_READ_PHY:
spin_lock_irqsave(&tp->lock, flags);
my_cmd.data = rtl8168_mdio_prot_read(tp, my_cmd.offset);
spin_unlock_irqrestore(&tp->lock, flags);
if (copy_to_user(ifr->ifr_data, &my_cmd, sizeof(my_cmd))) {
ret = -EFAULT;
break;
}
break;
case RTLTOOL_WRITE_PHY:
spin_lock_irqsave(&tp->lock, flags);
rtl8168_mdio_prot_write(tp, my_cmd.offset, my_cmd.data);
spin_unlock_irqrestore(&tp->lock, flags);
break;
case RTLTOOL_READ_EPHY:
spin_lock_irqsave(&tp->lock, flags);
my_cmd.data = rtl8168_ephy_read(tp, my_cmd.offset);
spin_unlock_irqrestore(&tp->lock, flags);
if (copy_to_user(ifr->ifr_data, &my_cmd, sizeof(my_cmd))) {
ret = -EFAULT;
break;
}
break;
case RTLTOOL_WRITE_EPHY:
spin_lock_irqsave(&tp->lock, flags);
rtl8168_ephy_write(tp, my_cmd.offset, my_cmd.data);
spin_unlock_irqrestore(&tp->lock, flags);
break;
case RTLTOOL_READ_ERI:
my_cmd.data = 0;
if (my_cmd.len==1 || my_cmd.len==2 || my_cmd.len==4) {
spin_lock_irqsave(&tp->lock, flags);
my_cmd.data = rtl8168_eri_read(tp, my_cmd.offset, my_cmd.len, ERIAR_ExGMAC);
spin_unlock_irqrestore(&tp->lock, flags);
} else {
ret = -EOPNOTSUPP;
break;
}
if (copy_to_user(ifr->ifr_data, &my_cmd, sizeof(my_cmd))) {
ret = -EFAULT;
break;
}
break;
case RTLTOOL_WRITE_ERI:
if (my_cmd.len==1 || my_cmd.len==2 || my_cmd.len==4) {
spin_lock_irqsave(&tp->lock, flags);
rtl8168_eri_write(tp, my_cmd.offset, my_cmd.len, my_cmd.data, ERIAR_ExGMAC);
spin_unlock_irqrestore(&tp->lock, flags);
} else {
ret = -EOPNOTSUPP;
break;
}
break;
case RTLTOOL_READ_PCI:
my_cmd.data = 0;
if (my_cmd.len==1)
pci_read_config_byte(tp->pci_dev, my_cmd.offset,
(u8 *)&my_cmd.data);
else if (my_cmd.len==2)
pci_read_config_word(tp->pci_dev, my_cmd.offset,
(u16 *)&my_cmd.data);
else if (my_cmd.len==4)
pci_read_config_dword(tp->pci_dev, my_cmd.offset,
&my_cmd.data);
else {
ret = -EOPNOTSUPP;
break;
}
if (copy_to_user(ifr->ifr_data, &my_cmd, sizeof(my_cmd))) {
ret = -EFAULT;
break;
}
break;
case RTLTOOL_WRITE_PCI:
if (my_cmd.len==1)
pci_write_config_byte(tp->pci_dev, my_cmd.offset,
my_cmd.data);
else if (my_cmd.len==2)
pci_write_config_word(tp->pci_dev, my_cmd.offset,
my_cmd.data);
else if (my_cmd.len==4)
pci_write_config_dword(tp->pci_dev, my_cmd.offset,
my_cmd.data);
else {
ret = -EOPNOTSUPP;
break;
}
break;
case RTLTOOL_READ_EEPROM:
spin_lock_irqsave(&tp->lock, flags);
my_cmd.data = rtl8168_eeprom_read_sc(tp, my_cmd.offset);
spin_unlock_irqrestore(&tp->lock, flags);
if (copy_to_user(ifr->ifr_data, &my_cmd, sizeof(my_cmd))) {
ret = -EFAULT;
break;
}
break;
case RTLTOOL_WRITE_EEPROM:
spin_lock_irqsave(&tp->lock, flags);
rtl8168_eeprom_write_sc(tp, my_cmd.offset, my_cmd.data);
spin_unlock_irqrestore(&tp->lock, flags);
break;
case RTL_READ_OOB_MAC:
spin_lock_irqsave(&tp->lock, flags);
rtl8168_oob_mutex_lock(tp);
my_cmd.data = rtl8168_ocp_read(tp, my_cmd.offset, 4);
rtl8168_oob_mutex_unlock(tp);
spin_unlock_irqrestore(&tp->lock, flags);
if (copy_to_user(ifr->ifr_data, &my_cmd, sizeof(my_cmd))) {
ret = -EFAULT;
break;
}
break;
case RTL_WRITE_OOB_MAC:
if (my_cmd.len == 0 || my_cmd.len > 4)
return -EOPNOTSUPP;
spin_lock_irqsave(&tp->lock, flags);
rtl8168_oob_mutex_lock(tp);
rtl8168_ocp_write(tp, my_cmd.offset, my_cmd.len, my_cmd.data);
rtl8168_oob_mutex_unlock(tp);
spin_unlock_irqrestore(&tp->lock, flags);
break;
case RTL_ENABLE_PCI_DIAG:
spin_lock_irqsave(&tp->lock, flags);
tp->rtk_enable_diag = 1;
spin_unlock_irqrestore(&tp->lock, flags);
dprintk("enable rtk diag\n");
break;
case RTL_DISABLE_PCI_DIAG:
spin_lock_irqsave(&tp->lock, flags);
tp->rtk_enable_diag = 0;
spin_unlock_irqrestore(&tp->lock, flags);
dprintk("disable rtk diag\n");
break;
case RTL_READ_MAC_OCP:
if (my_cmd.offset % 2)
return -EOPNOTSUPP;
spin_lock_irqsave(&tp->lock, flags);
my_cmd.data = rtl8168_mac_ocp_read(tp, my_cmd.offset);
spin_unlock_irqrestore(&tp->lock, flags);
if (copy_to_user(ifr->ifr_data, &my_cmd, sizeof(my_cmd))) {
ret = -EFAULT;
break;
}
break;
case RTL_WRITE_MAC_OCP:
if ((my_cmd.offset % 2) || (my_cmd.len != 2))
return -EOPNOTSUPP;
spin_lock_irqsave(&tp->lock, flags);
rtl8168_mac_ocp_write(tp, my_cmd.offset, (u16)my_cmd.data);
spin_unlock_irqrestore(&tp->lock, flags);
break;
case RTL_DIRECT_READ_PHY_OCP:
spin_lock_irqsave(&tp->lock, flags);
my_cmd.data = rtl8168_mdio_prot_direct_read_phy_ocp(tp, my_cmd.offset);
spin_unlock_irqrestore(&tp->lock, flags);
if (copy_to_user(ifr->ifr_data, &my_cmd, sizeof(my_cmd))) {
ret = -EFAULT;
break;
}
break;
case RTL_DIRECT_WRITE_PHY_OCP:
spin_lock_irqsave(&tp->lock, flags);
rtl8168_mdio_prot_direct_write_phy_ocp(tp, my_cmd.offset, my_cmd.data);
spin_unlock_irqrestore(&tp->lock, flags);
break;
default:
ret = -EOPNOTSUPP;
break;
}
return ret;
}

View File

@ -1,86 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
################################################################################
#
# r8168 is the Linux device driver released for Realtek Gigabit Ethernet
# controllers with PCI-Express interface.
#
# Copyright(c) 2022 Realtek Semiconductor Corp. All rights reserved.
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, see <http://www.gnu.org/licenses/>.
#
# Author:
# Realtek NIC software team <nicfae@realtek.com>
# No. 2, Innovation Road II, Hsinchu Science Park, Hsinchu 300, Taiwan
#
################################################################################
*/
/************************************************************************************
* This product is covered by one or more of the following patents:
* US6,570,884, US6,115,776, and US6,327,625.
***********************************************************************************/
#ifndef _LINUX_RTLTOOL_H
#define _LINUX_RTLTOOL_H
#define SIOCRTLTOOL SIOCDEVPRIVATE+1
enum rtl_cmd {
RTLTOOL_READ_MAC=0,
RTLTOOL_WRITE_MAC,
RTLTOOL_READ_PHY,
RTLTOOL_WRITE_PHY,
RTLTOOL_READ_EPHY,
RTLTOOL_WRITE_EPHY,
RTLTOOL_READ_ERI,
RTLTOOL_WRITE_ERI,
RTLTOOL_READ_PCI,
RTLTOOL_WRITE_PCI,
RTLTOOL_READ_EEPROM,
RTLTOOL_WRITE_EEPROM,
RTL_READ_OOB_MAC,
RTL_WRITE_OOB_MAC,
RTL_ENABLE_PCI_DIAG,
RTL_DISABLE_PCI_DIAG,
RTL_READ_MAC_OCP,
RTL_WRITE_MAC_OCP,
RTL_DIRECT_READ_PHY_OCP,
RTL_DIRECT_WRITE_PHY_OCP,
RTLTOOL_INVALID
};
struct rtltool_cmd {
__u32 cmd;
__u32 offset;
__u32 len;
__u32 data;
};
enum mode_access {
MODE_NONE=0,
MODE_READ,
MODE_WRITE
};
#ifdef __KERNEL__
int rtl8168_tool_ioctl(struct rtl8168_private *tp, struct ifreq *ifr);
#endif
#endif /* _LINUX_RTLTOOL_H */

View File

@ -0,0 +1,75 @@
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1386,16 +1386,6 @@ config BOOT_CONFIG_EMBED_FILE
This bootconfig will be used if there is no initrd or no other
bootconfig in the initrd.
-config INITRAMFS_PRESERVE_MTIME
- bool "Preserve cpio archive mtimes in initramfs"
- default y
- help
- Each entry in an initramfs cpio archive carries an mtime value. When
- enabled, extracted cpio items take this mtime, with directory mtime
- setting deferred until after creation of any child entries.
-
- If unsure, say Y.
-
choice
prompt "Compiler optimization level"
default CC_OPTIMIZE_FOR_PERFORMANCE
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -127,17 +127,15 @@ static void __init free_hash(void)
}
}
-#ifdef CONFIG_INITRAMFS_PRESERVE_MTIME
-static void __init do_utime(char *filename, time64_t mtime)
+static long __init do_utime(char *filename, time64_t mtime)
{
- struct timespec64 t[2] = { { .tv_sec = mtime }, { .tv_sec = mtime } };
- init_utimes(filename, t);
-}
+ struct timespec64 t[2];
-static void __init do_utime_path(const struct path *path, time64_t mtime)
-{
- struct timespec64 t[2] = { { .tv_sec = mtime }, { .tv_sec = mtime } };
- vfs_utimes(path, t);
+ t[0].tv_sec = mtime;
+ t[0].tv_nsec = 0;
+ t[1].tv_sec = mtime;
+ t[1].tv_nsec = 0;
+ return init_utimes(filename, t);
}
static __initdata LIST_HEAD(dir_list);
@@ -170,12 +168,6 @@ static void __init dir_utime(void)
kfree(de);
}
}
-#else
-static void __init do_utime(char *filename, time64_t mtime) {}
-static void __init do_utime_path(const struct path *path, time64_t mtime) {}
-static void __init dir_add(const char *name, time64_t mtime) {}
-static void __init dir_utime(void) {}
-#endif
static __initdata time64_t mtime;
@@ -407,10 +399,14 @@ static int __init do_name(void)
static int __init do_copy(void)
{
if (byte_count >= body_len) {
+ struct timespec64 t[2] = { };
if (xwrite(wfile, victim, body_len, &wfile_pos) != body_len)
error("write error");
- do_utime_path(&wfile->f_path, mtime);
+ t[0].tv_sec = mtime;
+ t[1].tv_sec = mtime;
+ vfs_utimes(&wfile->f_path, t);
+
fput(wfile);
if (csum_present && io_csum != hdr_csum)
error("bad data checksum");

View File

@ -0,0 +1,122 @@
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2133,8 +2133,6 @@ struct net_device {
/* Protocol-specific pointers */
- struct in_device __rcu *ip_ptr;
- struct inet6_dev __rcu *ip6_ptr;
#if IS_ENABLED(CONFIG_VLAN_8021Q)
struct vlan_info __rcu *vlan_info;
#endif
@@ -2147,18 +2145,16 @@ struct net_device {
#if IS_ENABLED(CONFIG_ATALK)
void *atalk_ptr;
#endif
+ struct in_device __rcu *ip_ptr;
#if IS_ENABLED(CONFIG_DECNET)
struct dn_dev __rcu *dn_ptr;
#endif
+ struct inet6_dev __rcu *ip6_ptr;
#if IS_ENABLED(CONFIG_AX25)
void *ax25_ptr;
#endif
-#if IS_ENABLED(CONFIG_CFG80211)
struct wireless_dev *ieee80211_ptr;
-#endif
-#if IS_ENABLED(CONFIG_IEEE802154) || IS_ENABLED(CONFIG_6LOWPAN)
struct wpan_dev *ieee802154_ptr;
-#endif
#if IS_ENABLED(CONFIG_MPLS_ROUTING)
struct mpls_dev __rcu *mpls_ptr;
#endif
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -8379,9 +8379,7 @@ int cfg80211_register_netdevice(struct n
*/
static inline void cfg80211_unregister_netdevice(struct net_device *dev)
{
-#if IS_ENABLED(CONFIG_CFG80211)
cfg80211_unregister_wdev(dev->ieee80211_ptr);
-#endif
}
/**
--- a/include/net/cfg802154.h
+++ b/include/net/cfg802154.h
@@ -373,7 +373,6 @@ struct wpan_dev {
#define to_phy(_dev) container_of(_dev, struct wpan_phy, dev)
-#if IS_ENABLED(CONFIG_IEEE802154) || IS_ENABLED(CONFIG_6LOWPAN)
static inline int
wpan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
const struct ieee802154_addr *daddr,
@@ -384,7 +383,6 @@ wpan_dev_hard_header(struct sk_buff *skb
return wpan_dev->header_ops->create(skb, dev, daddr, saddr, len);
}
-#endif
struct wpan_phy *
wpan_phy_new(const struct cfg802154_ops *ops, size_t priv_size);
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -308,11 +308,9 @@ static bool batadv_is_cfg80211_netdev(st
if (!net_device)
return false;
-#if IS_ENABLED(CONFIG_CFG80211)
/* cfg80211 drivers have to set ieee80211_ptr */
if (net_device->ieee80211_ptr)
return true;
-#endif
return false;
}
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -747,6 +747,7 @@ static const struct attribute_group nets
.attrs = netstat_attrs,
};
+#if IS_ENABLED(CONFIG_WIRELESS_EXT) || IS_ENABLED(CONFIG_CFG80211)
static struct attribute *wireless_attrs[] = {
NULL
};
@@ -755,19 +756,7 @@ static const struct attribute_group wire
.name = "wireless",
.attrs = wireless_attrs,
};
-
-static bool wireless_group_needed(struct net_device *ndev)
-{
-#if IS_ENABLED(CONFIG_CFG80211)
- if (ndev->ieee80211_ptr)
- return true;
#endif
-#if IS_ENABLED(CONFIG_WIRELESS_EXT)
- if (ndev->wireless_handlers)
- return true;
-#endif
- return false;
-}
#else /* CONFIG_SYSFS */
#define net_class_groups NULL
@@ -2008,8 +1997,14 @@ int netdev_register_kobject(struct net_d
*groups++ = &netstat_group;
- if (wireless_group_needed(ndev))
+#if IS_ENABLED(CONFIG_WIRELESS_EXT) || IS_ENABLED(CONFIG_CFG80211)
+ if (ndev->ieee80211_ptr)
+ *groups++ = &wireless_group;
+#if IS_ENABLED(CONFIG_WIRELESS_EXT)
+ else if (ndev->wireless_handlers)
*groups++ = &wireless_group;
+#endif
+#endif
#endif /* CONFIG_SYSFS */
error = device_add(dev);

View File

@ -0,0 +1,21 @@
From 173019b66dcc9d68ad9333aa744dad1e369b5aa8 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Sun, 9 Jul 2017 00:26:53 +0200
Subject: [PATCH 34/34] kernel: add compile fix for linux 4.9 on x86
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
Makefile | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/Makefile
+++ b/Makefile
@@ -537,7 +537,7 @@ KBUILD_LDFLAGS_MODULE :=
KBUILD_LDFLAGS :=
CLANG_FLAGS :=
-export ARCH SRCARCH CONFIG_SHELL BASH HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE LD CC HOSTPKG_CONFIG
+export ARCH SRCARCH SUBARCH CONFIG_SHELL BASH HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE LD CC HOSTPKG_CONFIG
export CPP AR NM STRIP OBJCOPY OBJDUMP READELF PAHOLE RESOLVE_BTFIDS LEX YACC AWK INSTALLKERNEL
export PERL PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX
export KGZIP KBZIP2 KLZOP LZMA LZ4 XZ ZSTD

View File

@ -0,0 +1,143 @@
From e3264035bdac67898d685423ffb2f3a9c3a5964a Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com>
Date: Wed, 4 Aug 2021 01:31:34 -0600
Subject: [PATCH 01/14] mm: x86, arm64: add arch_has_hw_pte_young()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Some architectures automatically set the accessed bit in PTEs, e.g.,
x86 and arm64 v8.2. On architectures that do not have this capability,
clearing the accessed bit in a PTE usually triggers a page fault
following the TLB miss of this PTE (to emulate the accessed bit).
Being aware of this capability can help make better decisions, e.g.,
whether to spread the work out over a period of time to reduce bursty
page faults when trying to clear the accessed bit in many PTEs.
Note that theoretically this capability can be unreliable, e.g.,
hotplugged CPUs might be different from builtin ones. Therefore it
should not be used in architecture-independent code that involves
correctness, e.g., to determine whether TLB flushes are required (in
combination with the accessed bit).
Signed-off-by: Yu Zhao <yuzhao@google.com>
Reviewed-by: Barry Song <baohua@kernel.org>
Acked-by: Brian Geffon <bgeffon@google.com>
Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Acked-by: Steven Barrett <steven@liquorix.net>
Acked-by: Suleiman Souhlal <suleiman@google.com>
Acked-by: Will Deacon <will@kernel.org>
Tested-by: Daniel Byrne <djbyrne@mtu.edu>
Tested-by: Donald Carr <d@chaos-reins.com>
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
Tested-by: Sofia Trinh <sofia.trinh@edi.works>
Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
Change-Id: Ib49b44fb56df3333a2ff1fcc496fb1980b976e7a
---
arch/arm64/include/asm/pgtable.h | 15 ++-------------
arch/x86/include/asm/pgtable.h | 6 +++---
include/linux/pgtable.h | 13 +++++++++++++
mm/memory.c | 14 +-------------
4 files changed, 19 insertions(+), 29 deletions(-)
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -1082,24 +1082,13 @@ static inline void update_mmu_cache(stru
* page after fork() + CoW for pfn mappings. We don't always have a
* hardware-managed access flag on arm64.
*/
-static inline bool arch_faults_on_old_pte(void)
-{
- /* The register read below requires a stable CPU to make any sense */
- cant_migrate();
-
- return !cpu_has_hw_af();
-}
-#define arch_faults_on_old_pte arch_faults_on_old_pte
+#define arch_has_hw_pte_young cpu_has_hw_af
/*
* Experimentally, it's cheap to set the access flag in hardware and we
* benefit from prefaulting mappings as 'old' to start with.
*/
-static inline bool arch_wants_old_prefaulted_pte(void)
-{
- return !arch_faults_on_old_pte();
-}
-#define arch_wants_old_prefaulted_pte arch_wants_old_prefaulted_pte
+#define arch_wants_old_prefaulted_pte cpu_has_hw_af
static inline bool pud_sect_supported(void)
{
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1431,10 +1431,10 @@ static inline bool arch_has_pfn_modify_c
return boot_cpu_has_bug(X86_BUG_L1TF);
}
-#define arch_faults_on_old_pte arch_faults_on_old_pte
-static inline bool arch_faults_on_old_pte(void)
+#define arch_has_hw_pte_young arch_has_hw_pte_young
+static inline bool arch_has_hw_pte_young(void)
{
- return false;
+ return true;
}
#ifdef CONFIG_PAGE_TABLE_CHECK
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -260,6 +260,19 @@ static inline int pmdp_clear_flush_young
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif
+#ifndef arch_has_hw_pte_young
+/*
+ * Return whether the accessed bit is supported on the local CPU.
+ *
+ * This stub assumes accessing through an old PTE triggers a page fault.
+ * Architectures that automatically set the access bit should overwrite it.
+ */
+static inline bool arch_has_hw_pte_young(void)
+{
+ return false;
+}
+#endif
+
#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
unsigned long address,
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -125,18 +125,6 @@ int randomize_va_space __read_mostly =
2;
#endif
-#ifndef arch_faults_on_old_pte
-static inline bool arch_faults_on_old_pte(void)
-{
- /*
- * Those arches which don't have hw access flag feature need to
- * implement their own helper. By default, "true" means pagefault
- * will be hit on old pte.
- */
- return true;
-}
-#endif
-
#ifndef arch_wants_old_prefaulted_pte
static inline bool arch_wants_old_prefaulted_pte(void)
{
@@ -2872,7 +2860,7 @@ static inline bool __wp_page_copy_user(s
* On architectures with software "accessed" bits, we would
* take a double page fault, so mark it accessed here.
*/
- if (arch_faults_on_old_pte() && !pte_young(vmf->orig_pte)) {
+ if (!arch_has_hw_pte_young() && !pte_young(vmf->orig_pte)) {
pte_t entry;
vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl);

View File

@ -0,0 +1,132 @@
From 0c0016e6f53b52166fe4da61c81fa6b27f4650cd Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com>
Date: Sat, 26 Sep 2020 21:17:18 -0600
Subject: [PATCH 02/14] mm: x86: add CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Some architectures support the accessed bit in non-leaf PMD entries,
e.g., x86 sets the accessed bit in a non-leaf PMD entry when using it
as part of linear address translation [1]. Page table walkers that
clear the accessed bit may use this capability to reduce their search
space.
Note that:
1. Although an inline function is preferable, this capability is added
as a configuration option for consistency with the existing macros.
2. Due to the little interest in other varieties, this capability was
only tested on Intel and AMD CPUs.
Thanks to the following developers for their efforts [2][3].
Randy Dunlap <rdunlap@infradead.org>
Stephen Rothwell <sfr@canb.auug.org.au>
[1]: Intel 64 and IA-32 Architectures Software Developer's Manual
Volume 3 (June 2021), section 4.8
[2] https://lore.kernel.org/r/bfdcc7c8-922f-61a9-aa15-7e7250f04af7@infradead.org/
[3] https://lore.kernel.org/r/20220413151513.5a0d7a7e@canb.auug.org.au/
Signed-off-by: Yu Zhao <yuzhao@google.com>
Reviewed-by: Barry Song <baohua@kernel.org>
Acked-by: Brian Geffon <bgeffon@google.com>
Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Acked-by: Steven Barrett <steven@liquorix.net>
Acked-by: Suleiman Souhlal <suleiman@google.com>
Tested-by: Daniel Byrne <djbyrne@mtu.edu>
Tested-by: Donald Carr <d@chaos-reins.com>
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
Tested-by: Sofia Trinh <sofia.trinh@edi.works>
Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
Change-Id: I1a17be3ae926f721f7b17ea1539e5c39e8c4f9a8
---
arch/Kconfig | 8 ++++++++
arch/x86/Kconfig | 1 +
arch/x86/include/asm/pgtable.h | 3 ++-
arch/x86/mm/pgtable.c | 5 ++++-
include/linux/pgtable.h | 4 ++--
5 files changed, 17 insertions(+), 4 deletions(-)
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1418,6 +1418,14 @@ config DYNAMIC_SIGFRAME
config HAVE_ARCH_NODE_DEV_GROUP
bool
+config ARCH_HAS_NONLEAF_PMD_YOUNG
+ bool
+ help
+ Architectures that select this option are capable of setting the
+ accessed bit in non-leaf PMD entries when using them as part of linear
+ address translations. Page table walkers that clear the accessed bit
+ may use this capability to reduce their search space.
+
source "kernel/gcov/Kconfig"
source "scripts/gcc-plugins/Kconfig"
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -85,6 +85,7 @@ config X86
select ARCH_HAS_PMEM_API if X86_64
select ARCH_HAS_PTE_DEVMAP if X86_64
select ARCH_HAS_PTE_SPECIAL
+ select ARCH_HAS_NONLEAF_PMD_YOUNG if PGTABLE_LEVELS > 2
select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64
select ARCH_HAS_COPY_MC if X86_64
select ARCH_HAS_SET_MEMORY
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -815,7 +815,8 @@ static inline unsigned long pmd_page_vad
static inline int pmd_bad(pmd_t pmd)
{
- return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE;
+ return (pmd_flags(pmd) & ~(_PAGE_USER | _PAGE_ACCESSED)) !=
+ (_KERNPG_TABLE & ~_PAGE_ACCESSED);
}
static inline unsigned long pages_to_mb(unsigned long npg)
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -550,7 +550,7 @@ int ptep_test_and_clear_young(struct vm_
return ret;
}
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
int pmdp_test_and_clear_young(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmdp)
{
@@ -562,6 +562,9 @@ int pmdp_test_and_clear_young(struct vm_
return ret;
}
+#endif
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
int pudp_test_and_clear_young(struct vm_area_struct *vma,
unsigned long addr, pud_t *pudp)
{
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -213,7 +213,7 @@ static inline int ptep_test_and_clear_yo
#endif
#ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
unsigned long address,
pmd_t *pmdp)
@@ -234,7 +234,7 @@ static inline int pmdp_test_and_clear_yo
BUILD_BUG();
return 0;
}
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG */
#endif
#ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH

View File

@ -0,0 +1,254 @@
From d8e0edcddc441574410a047ede56f79c849a6d37 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com>
Date: Sun, 27 Sep 2020 20:49:08 -0600
Subject: [PATCH 03/14] mm/vmscan.c: refactor shrink_node()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This patch refactors shrink_node() to improve readability for the
upcoming changes to mm/vmscan.c.
Signed-off-by: Yu Zhao <yuzhao@google.com>
Reviewed-by: Barry Song <baohua@kernel.org>
Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>
Acked-by: Brian Geffon <bgeffon@google.com>
Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Acked-by: Steven Barrett <steven@liquorix.net>
Acked-by: Suleiman Souhlal <suleiman@google.com>
Tested-by: Daniel Byrne <djbyrne@mtu.edu>
Tested-by: Donald Carr <d@chaos-reins.com>
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
Tested-by: Sofia Trinh <sofia.trinh@edi.works>
Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
Change-Id: Iae734b5b4030205b7db6e8c841f747b6f6ae1a04
---
mm/vmscan.c | 198 +++++++++++++++++++++++++++-------------------------
1 file changed, 104 insertions(+), 94 deletions(-)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2728,6 +2728,109 @@ enum scan_balance {
SCAN_FILE,
};
+static void prepare_scan_count(pg_data_t *pgdat, struct scan_control *sc)
+{
+ unsigned long file;
+ struct lruvec *target_lruvec;
+
+ target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
+
+ /*
+ * Flush the memory cgroup stats, so that we read accurate per-memcg
+ * lruvec stats for heuristics.
+ */
+ mem_cgroup_flush_stats();
+
+ /*
+ * Determine the scan balance between anon and file LRUs.
+ */
+ spin_lock_irq(&target_lruvec->lru_lock);
+ sc->anon_cost = target_lruvec->anon_cost;
+ sc->file_cost = target_lruvec->file_cost;
+ spin_unlock_irq(&target_lruvec->lru_lock);
+
+ /*
+ * Target desirable inactive:active list ratios for the anon
+ * and file LRU lists.
+ */
+ if (!sc->force_deactivate) {
+ unsigned long refaults;
+
+ refaults = lruvec_page_state(target_lruvec,
+ WORKINGSET_ACTIVATE_ANON);
+ if (refaults != target_lruvec->refaults[0] ||
+ inactive_is_low(target_lruvec, LRU_INACTIVE_ANON))
+ sc->may_deactivate |= DEACTIVATE_ANON;
+ else
+ sc->may_deactivate &= ~DEACTIVATE_ANON;
+
+ /*
+ * When refaults are being observed, it means a new
+ * workingset is being established. Deactivate to get
+ * rid of any stale active pages quickly.
+ */
+ refaults = lruvec_page_state(target_lruvec,
+ WORKINGSET_ACTIVATE_FILE);
+ if (refaults != target_lruvec->refaults[1] ||
+ inactive_is_low(target_lruvec, LRU_INACTIVE_FILE))
+ sc->may_deactivate |= DEACTIVATE_FILE;
+ else
+ sc->may_deactivate &= ~DEACTIVATE_FILE;
+ } else
+ sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE;
+
+ /*
+ * If we have plenty of inactive file pages that aren't
+ * thrashing, try to reclaim those first before touching
+ * anonymous pages.
+ */
+ file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE);
+ if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE))
+ sc->cache_trim_mode = 1;
+ else
+ sc->cache_trim_mode = 0;
+
+ /*
+ * Prevent the reclaimer from falling into the cache trap: as
+ * cache pages start out inactive, every cache fault will tip
+ * the scan balance towards the file LRU. And as the file LRU
+ * shrinks, so does the window for rotation from references.
+ * This means we have a runaway feedback loop where a tiny
+ * thrashing file LRU becomes infinitely more attractive than
+ * anon pages. Try to detect this based on file LRU size.
+ */
+ if (!cgroup_reclaim(sc)) {
+ unsigned long total_high_wmark = 0;
+ unsigned long free, anon;
+ int z;
+
+ free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES);
+ file = node_page_state(pgdat, NR_ACTIVE_FILE) +
+ node_page_state(pgdat, NR_INACTIVE_FILE);
+
+ for (z = 0; z < MAX_NR_ZONES; z++) {
+ struct zone *zone = &pgdat->node_zones[z];
+
+ if (!managed_zone(zone))
+ continue;
+
+ total_high_wmark += high_wmark_pages(zone);
+ }
+
+ /*
+ * Consider anon: if that's low too, this isn't a
+ * runaway file reclaim problem, but rather just
+ * extreme pressure. Reclaim as per usual then.
+ */
+ anon = node_page_state(pgdat, NR_INACTIVE_ANON);
+
+ sc->file_is_tiny =
+ file + free <= total_high_wmark &&
+ !(sc->may_deactivate & DEACTIVATE_ANON) &&
+ anon >> sc->priority;
+ }
+}
+
/*
* Determine how aggressively the anon and file LRU lists should be
* scanned.
@@ -3197,109 +3300,16 @@ static void shrink_node(pg_data_t *pgdat
unsigned long nr_reclaimed, nr_scanned;
struct lruvec *target_lruvec;
bool reclaimable = false;
- unsigned long file;
target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
again:
- /*
- * Flush the memory cgroup stats, so that we read accurate per-memcg
- * lruvec stats for heuristics.
- */
- mem_cgroup_flush_stats();
-
memset(&sc->nr, 0, sizeof(sc->nr));
nr_reclaimed = sc->nr_reclaimed;
nr_scanned = sc->nr_scanned;
- /*
- * Determine the scan balance between anon and file LRUs.
- */
- spin_lock_irq(&target_lruvec->lru_lock);
- sc->anon_cost = target_lruvec->anon_cost;
- sc->file_cost = target_lruvec->file_cost;
- spin_unlock_irq(&target_lruvec->lru_lock);
-
- /*
- * Target desirable inactive:active list ratios for the anon
- * and file LRU lists.
- */
- if (!sc->force_deactivate) {
- unsigned long refaults;
-
- refaults = lruvec_page_state(target_lruvec,
- WORKINGSET_ACTIVATE_ANON);
- if (refaults != target_lruvec->refaults[0] ||
- inactive_is_low(target_lruvec, LRU_INACTIVE_ANON))
- sc->may_deactivate |= DEACTIVATE_ANON;
- else
- sc->may_deactivate &= ~DEACTIVATE_ANON;
-
- /*
- * When refaults are being observed, it means a new
- * workingset is being established. Deactivate to get
- * rid of any stale active pages quickly.
- */
- refaults = lruvec_page_state(target_lruvec,
- WORKINGSET_ACTIVATE_FILE);
- if (refaults != target_lruvec->refaults[1] ||
- inactive_is_low(target_lruvec, LRU_INACTIVE_FILE))
- sc->may_deactivate |= DEACTIVATE_FILE;
- else
- sc->may_deactivate &= ~DEACTIVATE_FILE;
- } else
- sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE;
-
- /*
- * If we have plenty of inactive file pages that aren't
- * thrashing, try to reclaim those first before touching
- * anonymous pages.
- */
- file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE);
- if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE))
- sc->cache_trim_mode = 1;
- else
- sc->cache_trim_mode = 0;
-
- /*
- * Prevent the reclaimer from falling into the cache trap: as
- * cache pages start out inactive, every cache fault will tip
- * the scan balance towards the file LRU. And as the file LRU
- * shrinks, so does the window for rotation from references.
- * This means we have a runaway feedback loop where a tiny
- * thrashing file LRU becomes infinitely more attractive than
- * anon pages. Try to detect this based on file LRU size.
- */
- if (!cgroup_reclaim(sc)) {
- unsigned long total_high_wmark = 0;
- unsigned long free, anon;
- int z;
-
- free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES);
- file = node_page_state(pgdat, NR_ACTIVE_FILE) +
- node_page_state(pgdat, NR_INACTIVE_FILE);
-
- for (z = 0; z < MAX_NR_ZONES; z++) {
- struct zone *zone = &pgdat->node_zones[z];
- if (!managed_zone(zone))
- continue;
-
- total_high_wmark += high_wmark_pages(zone);
- }
-
- /*
- * Consider anon: if that's low too, this isn't a
- * runaway file reclaim problem, but rather just
- * extreme pressure. Reclaim as per usual then.
- */
- anon = node_page_state(pgdat, NR_INACTIVE_ANON);
-
- sc->file_is_tiny =
- file + free <= total_high_wmark &&
- !(sc->may_deactivate & DEACTIVATE_ANON) &&
- anon >> sc->priority;
- }
+ prepare_scan_count(pgdat, sc);
shrink_node_memcgs(pgdat, sc);

View File

@ -0,0 +1,59 @@
From bc14d2c7c6d0fb8c79ad0fc5eab488b977cbcccf Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com>
Date: Sun, 6 Mar 2022 20:22:40 -0700
Subject: [PATCH 04/14] Revert "include/linux/mm_inline.h: fold
__update_lru_size() into its sole caller"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This patch undoes the following refactor:
commit 289ccba18af4 ("include/linux/mm_inline.h: fold __update_lru_size() into its sole caller")
The upcoming changes to include/linux/mm_inline.h will reuse
__update_lru_size().
Signed-off-by: Yu Zhao <yuzhao@google.com>
Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>
Acked-by: Brian Geffon <bgeffon@google.com>
Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Acked-by: Steven Barrett <steven@liquorix.net>
Acked-by: Suleiman Souhlal <suleiman@google.com>
Tested-by: Daniel Byrne <djbyrne@mtu.edu>
Tested-by: Donald Carr <d@chaos-reins.com>
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
Tested-by: Sofia Trinh <sofia.trinh@edi.works>
Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
Change-Id: I6155c407d50199a43b179c7f45904d4b7c052118
---
include/linux/mm_inline.h | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -34,7 +34,7 @@ static inline int page_is_file_lru(struc
return folio_is_file_lru(page_folio(page));
}
-static __always_inline void update_lru_size(struct lruvec *lruvec,
+static __always_inline void __update_lru_size(struct lruvec *lruvec,
enum lru_list lru, enum zone_type zid,
long nr_pages)
{
@@ -43,6 +43,13 @@ static __always_inline void update_lru_s
__mod_lruvec_state(lruvec, NR_LRU_BASE + lru, nr_pages);
__mod_zone_page_state(&pgdat->node_zones[zid],
NR_ZONE_LRU_BASE + lru, nr_pages);
+}
+
+static __always_inline void update_lru_size(struct lruvec *lruvec,
+ enum lru_list lru, enum zone_type zid,
+ long nr_pages)
+{
+ __update_lru_size(lruvec, lru, zid, nr_pages);
#ifdef CONFIG_MEMCG
mem_cgroup_update_lru_size(lruvec, lru, zid, nr_pages);
#endif

View File

@ -0,0 +1,777 @@
From 8c6beb4548c216da9dae5e1a7612a108396e3f9e Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com>
Date: Mon, 25 Jan 2021 21:12:33 -0700
Subject: [PATCH 05/14] mm: multi-gen LRU: groundwork
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Evictable pages are divided into multiple generations for each lruvec.
The youngest generation number is stored in lrugen->max_seq for both
anon and file types as they are aged on an equal footing. The oldest
generation numbers are stored in lrugen->min_seq[] separately for anon
and file types as clean file pages can be evicted regardless of swap
constraints. These three variables are monotonically increasing.
Generation numbers are truncated into order_base_2(MAX_NR_GENS+1) bits
in order to fit into the gen counter in folio->flags. Each truncated
generation number is an index to lrugen->lists[]. The sliding window
technique is used to track at least MIN_NR_GENS and at most
MAX_NR_GENS generations. The gen counter stores a value within [1,
MAX_NR_GENS] while a page is on one of lrugen->lists[]. Otherwise it
stores 0.
There are two conceptually independent procedures: "the aging", which
produces young generations, and "the eviction", which consumes old
generations. They form a closed-loop system, i.e., "the page reclaim".
Both procedures can be invoked from userspace for the purposes of
working set estimation and proactive reclaim. These techniques are
commonly used to optimize job scheduling (bin packing) in data
centers [1][2].
To avoid confusion, the terms "hot" and "cold" will be applied to the
multi-gen LRU, as a new convention; the terms "active" and "inactive"
will be applied to the active/inactive LRU, as usual.
The protection of hot pages and the selection of cold pages are based
on page access channels and patterns. There are two access channels:
one through page tables and the other through file descriptors. The
protection of the former channel is by design stronger because:
1. The uncertainty in determining the access patterns of the former
channel is higher due to the approximation of the accessed bit.
2. The cost of evicting the former channel is higher due to the TLB
flushes required and the likelihood of encountering the dirty bit.
3. The penalty of underprotecting the former channel is higher because
applications usually do not prepare themselves for major page
faults like they do for blocked I/O. E.g., GUI applications
commonly use dedicated I/O threads to avoid blocking rendering
threads.
There are also two access patterns: one with temporal locality and the
other without. For the reasons listed above, the former channel is
assumed to follow the former pattern unless VM_SEQ_READ or
VM_RAND_READ is present; the latter channel is assumed to follow the
latter pattern unless outlying refaults have been observed [3][4].
The next patch will address the "outlying refaults". Three macros,
i.e., LRU_REFS_WIDTH, LRU_REFS_PGOFF and LRU_REFS_MASK, used later are
added in this patch to make the entire patchset less diffy.
A page is added to the youngest generation on faulting. The aging
needs to check the accessed bit at least twice before handing this
page over to the eviction. The first check takes care of the accessed
bit set on the initial fault; the second check makes sure this page
has not been used since then. This protocol, AKA second chance,
requires a minimum of two generations, hence MIN_NR_GENS.
[1] https://dl.acm.org/doi/10.1145/3297858.3304053
[2] https://dl.acm.org/doi/10.1145/3503222.3507731
[3] https://lwn.net/Articles/495543/
[4] https://lwn.net/Articles/815342/
Signed-off-by: Yu Zhao <yuzhao@google.com>
Acked-by: Brian Geffon <bgeffon@google.com>
Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Acked-by: Steven Barrett <steven@liquorix.net>
Acked-by: Suleiman Souhlal <suleiman@google.com>
Tested-by: Daniel Byrne <djbyrne@mtu.edu>
Tested-by: Donald Carr <d@chaos-reins.com>
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
Tested-by: Sofia Trinh <sofia.trinh@edi.works>
Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
Change-Id: I71de7cd15b8dfa6f9fdd838023474693c4fee0a7
---
fs/fuse/dev.c | 3 +-
include/linux/mm_inline.h | 175 ++++++++++++++++++++++++++++++
include/linux/mmzone.h | 102 +++++++++++++++++
include/linux/page-flags-layout.h | 13 ++-
include/linux/page-flags.h | 4 +-
include/linux/sched.h | 4 +
kernel/bounds.c | 5 +
mm/Kconfig | 8 ++
mm/huge_memory.c | 3 +-
mm/memcontrol.c | 2 +
mm/memory.c | 25 +++++
mm/mm_init.c | 6 +-
mm/mmzone.c | 2 +
mm/swap.c | 11 +-
mm/vmscan.c | 75 +++++++++++++
15 files changed, 424 insertions(+), 14 deletions(-)
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -776,7 +776,8 @@ static int fuse_check_page(struct page *
1 << PG_active |
1 << PG_workingset |
1 << PG_reclaim |
- 1 << PG_waiters))) {
+ 1 << PG_waiters |
+ LRU_GEN_MASK | LRU_REFS_MASK))) {
dump_page(page, "fuse: trying to steal weird page");
return 1;
}
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -40,6 +40,9 @@ static __always_inline void __update_lru
{
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
+ lockdep_assert_held(&lruvec->lru_lock);
+ WARN_ON_ONCE(nr_pages != (int)nr_pages);
+
__mod_lruvec_state(lruvec, NR_LRU_BASE + lru, nr_pages);
__mod_zone_page_state(&pgdat->node_zones[zid],
NR_ZONE_LRU_BASE + lru, nr_pages);
@@ -101,11 +104,177 @@ static __always_inline enum lru_list fol
return lru;
}
+#ifdef CONFIG_LRU_GEN
+
+static inline bool lru_gen_enabled(void)
+{
+ return true;
+}
+
+static inline bool lru_gen_in_fault(void)
+{
+ return current->in_lru_fault;
+}
+
+static inline int lru_gen_from_seq(unsigned long seq)
+{
+ return seq % MAX_NR_GENS;
+}
+
+static inline int folio_lru_gen(struct folio *folio)
+{
+ unsigned long flags = READ_ONCE(folio->flags);
+
+ return ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
+}
+
+static inline bool lru_gen_is_active(struct lruvec *lruvec, int gen)
+{
+ unsigned long max_seq = lruvec->lrugen.max_seq;
+
+ VM_WARN_ON_ONCE(gen >= MAX_NR_GENS);
+
+ /* see the comment on MIN_NR_GENS */
+ return gen == lru_gen_from_seq(max_seq) || gen == lru_gen_from_seq(max_seq - 1);
+}
+
+static inline void lru_gen_update_size(struct lruvec *lruvec, struct folio *folio,
+ int old_gen, int new_gen)
+{
+ int type = folio_is_file_lru(folio);
+ int zone = folio_zonenum(folio);
+ int delta = folio_nr_pages(folio);
+ enum lru_list lru = type * LRU_INACTIVE_FILE;
+ struct lru_gen_struct *lrugen = &lruvec->lrugen;
+
+ VM_WARN_ON_ONCE(old_gen != -1 && old_gen >= MAX_NR_GENS);
+ VM_WARN_ON_ONCE(new_gen != -1 && new_gen >= MAX_NR_GENS);
+ VM_WARN_ON_ONCE(old_gen == -1 && new_gen == -1);
+
+ if (old_gen >= 0)
+ WRITE_ONCE(lrugen->nr_pages[old_gen][type][zone],
+ lrugen->nr_pages[old_gen][type][zone] - delta);
+ if (new_gen >= 0)
+ WRITE_ONCE(lrugen->nr_pages[new_gen][type][zone],
+ lrugen->nr_pages[new_gen][type][zone] + delta);
+
+ /* addition */
+ if (old_gen < 0) {
+ if (lru_gen_is_active(lruvec, new_gen))
+ lru += LRU_ACTIVE;
+ __update_lru_size(lruvec, lru, zone, delta);
+ return;
+ }
+
+ /* deletion */
+ if (new_gen < 0) {
+ if (lru_gen_is_active(lruvec, old_gen))
+ lru += LRU_ACTIVE;
+ __update_lru_size(lruvec, lru, zone, -delta);
+ return;
+ }
+}
+
+static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
+{
+ unsigned long seq;
+ unsigned long flags;
+ int gen = folio_lru_gen(folio);
+ int type = folio_is_file_lru(folio);
+ int zone = folio_zonenum(folio);
+ struct lru_gen_struct *lrugen = &lruvec->lrugen;
+
+ VM_WARN_ON_ONCE_FOLIO(gen != -1, folio);
+
+ if (folio_test_unevictable(folio))
+ return false;
+ /*
+ * There are three common cases for this page:
+ * 1. If it's hot, e.g., freshly faulted in or previously hot and
+ * migrated, add it to the youngest generation.
+ * 2. If it's cold but can't be evicted immediately, i.e., an anon page
+ * not in swapcache or a dirty page pending writeback, add it to the
+ * second oldest generation.
+ * 3. Everything else (clean, cold) is added to the oldest generation.
+ */
+ if (folio_test_active(folio))
+ seq = lrugen->max_seq;
+ else if ((type == LRU_GEN_ANON && !folio_test_swapcache(folio)) ||
+ (folio_test_reclaim(folio) &&
+ (folio_test_dirty(folio) || folio_test_writeback(folio))))
+ seq = lrugen->min_seq[type] + 1;
+ else
+ seq = lrugen->min_seq[type];
+
+ gen = lru_gen_from_seq(seq);
+ flags = (gen + 1UL) << LRU_GEN_PGOFF;
+ /* see the comment on MIN_NR_GENS about PG_active */
+ set_mask_bits(&folio->flags, LRU_GEN_MASK | BIT(PG_active), flags);
+
+ lru_gen_update_size(lruvec, folio, -1, gen);
+ /* for folio_rotate_reclaimable() */
+ if (reclaiming)
+ list_add_tail(&folio->lru, &lrugen->lists[gen][type][zone]);
+ else
+ list_add(&folio->lru, &lrugen->lists[gen][type][zone]);
+
+ return true;
+}
+
+static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
+{
+ unsigned long flags;
+ int gen = folio_lru_gen(folio);
+
+ if (gen < 0)
+ return false;
+
+ VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
+ VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
+
+ /* for folio_migrate_flags() */
+ flags = !reclaiming && lru_gen_is_active(lruvec, gen) ? BIT(PG_active) : 0;
+ flags = set_mask_bits(&folio->flags, LRU_GEN_MASK, flags);
+ gen = ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
+
+ lru_gen_update_size(lruvec, folio, gen, -1);
+ list_del(&folio->lru);
+
+ return true;
+}
+
+#else /* !CONFIG_LRU_GEN */
+
+static inline bool lru_gen_enabled(void)
+{
+ return false;
+}
+
+static inline bool lru_gen_in_fault(void)
+{
+ return false;
+}
+
+static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
+{
+ return false;
+}
+
+static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
+{
+ return false;
+}
+
+#endif /* CONFIG_LRU_GEN */
+
static __always_inline
void lruvec_add_folio(struct lruvec *lruvec, struct folio *folio)
{
enum lru_list lru = folio_lru_list(folio);
+ if (lru_gen_add_folio(lruvec, folio, false))
+ return;
+
update_lru_size(lruvec, lru, folio_zonenum(folio),
folio_nr_pages(folio));
if (lru != LRU_UNEVICTABLE)
@@ -123,6 +292,9 @@ void lruvec_add_folio_tail(struct lruvec
{
enum lru_list lru = folio_lru_list(folio);
+ if (lru_gen_add_folio(lruvec, folio, true))
+ return;
+
update_lru_size(lruvec, lru, folio_zonenum(folio),
folio_nr_pages(folio));
/* This is not expected to be used on LRU_UNEVICTABLE */
@@ -140,6 +312,9 @@ void lruvec_del_folio(struct lruvec *lru
{
enum lru_list lru = folio_lru_list(folio);
+ if (lru_gen_del_folio(lruvec, folio, false))
+ return;
+
if (lru != LRU_UNEVICTABLE)
list_del(&folio->lru);
update_lru_size(lruvec, lru, folio_zonenum(folio),
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -314,6 +314,102 @@ enum lruvec_flags {
*/
};
+#endif /* !__GENERATING_BOUNDS_H */
+
+/*
+ * Evictable pages are divided into multiple generations. The youngest and the
+ * oldest generation numbers, max_seq and min_seq, are monotonically increasing.
+ * They form a sliding window of a variable size [MIN_NR_GENS, MAX_NR_GENS]. An
+ * offset within MAX_NR_GENS, i.e., gen, indexes the LRU list of the
+ * corresponding generation. The gen counter in folio->flags stores gen+1 while
+ * a page is on one of lrugen->lists[]. Otherwise it stores 0.
+ *
+ * A page is added to the youngest generation on faulting. The aging needs to
+ * check the accessed bit at least twice before handing this page over to the
+ * eviction. The first check takes care of the accessed bit set on the initial
+ * fault; the second check makes sure this page hasn't been used since then.
+ * This process, AKA second chance, requires a minimum of two generations,
+ * hence MIN_NR_GENS. And to maintain ABI compatibility with the active/inactive
+ * LRU, e.g., /proc/vmstat, these two generations are considered active; the
+ * rest of generations, if they exist, are considered inactive. See
+ * lru_gen_is_active().
+ *
+ * PG_active is always cleared while a page is on one of lrugen->lists[] so that
+ * the aging needs not to worry about it. And it's set again when a page
+ * considered active is isolated for non-reclaiming purposes, e.g., migration.
+ * See lru_gen_add_folio() and lru_gen_del_folio().
+ *
+ * MAX_NR_GENS is set to 4 so that the multi-gen LRU can support twice the
+ * number of categories of the active/inactive LRU when keeping track of
+ * accesses through page tables. This requires order_base_2(MAX_NR_GENS+1) bits
+ * in folio->flags.
+ */
+#define MIN_NR_GENS 2U
+#define MAX_NR_GENS 4U
+
+#ifndef __GENERATING_BOUNDS_H
+
+struct lruvec;
+
+#define LRU_GEN_MASK ((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF)
+#define LRU_REFS_MASK ((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF)
+
+#ifdef CONFIG_LRU_GEN
+
+enum {
+ LRU_GEN_ANON,
+ LRU_GEN_FILE,
+};
+
+/*
+ * The youngest generation number is stored in max_seq for both anon and file
+ * types as they are aged on an equal footing. The oldest generation numbers are
+ * stored in min_seq[] separately for anon and file types as clean file pages
+ * can be evicted regardless of swap constraints.
+ *
+ * Normally anon and file min_seq are in sync. But if swapping is constrained,
+ * e.g., out of swap space, file min_seq is allowed to advance and leave anon
+ * min_seq behind.
+ *
+ * The number of pages in each generation is eventually consistent and therefore
+ * can be transiently negative.
+ */
+struct lru_gen_struct {
+ /* the aging increments the youngest generation number */
+ unsigned long max_seq;
+ /* the eviction increments the oldest generation numbers */
+ unsigned long min_seq[ANON_AND_FILE];
+ /* the multi-gen LRU lists, lazily sorted on eviction */
+ struct list_head lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
+ /* the multi-gen LRU sizes, eventually consistent */
+ long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
+};
+
+void lru_gen_init_lruvec(struct lruvec *lruvec);
+
+#ifdef CONFIG_MEMCG
+void lru_gen_init_memcg(struct mem_cgroup *memcg);
+void lru_gen_exit_memcg(struct mem_cgroup *memcg);
+#endif
+
+#else /* !CONFIG_LRU_GEN */
+
+static inline void lru_gen_init_lruvec(struct lruvec *lruvec)
+{
+}
+
+#ifdef CONFIG_MEMCG
+static inline void lru_gen_init_memcg(struct mem_cgroup *memcg)
+{
+}
+
+static inline void lru_gen_exit_memcg(struct mem_cgroup *memcg)
+{
+}
+#endif
+
+#endif /* CONFIG_LRU_GEN */
+
struct lruvec {
struct list_head lists[NR_LRU_LISTS];
/* per lruvec lru_lock for memcg */
@@ -331,6 +427,10 @@ struct lruvec {
unsigned long refaults[ANON_AND_FILE];
/* Various lruvec state flags (enum lruvec_flags) */
unsigned long flags;
+#ifdef CONFIG_LRU_GEN
+ /* evictable pages divided into generations */
+ struct lru_gen_struct lrugen;
+#endif
#ifdef CONFIG_MEMCG
struct pglist_data *pgdat;
#endif
@@ -746,6 +846,8 @@ static inline bool zone_is_empty(struct
#define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH)
#define LAST_CPUPID_PGOFF (ZONES_PGOFF - LAST_CPUPID_WIDTH)
#define KASAN_TAG_PGOFF (LAST_CPUPID_PGOFF - KASAN_TAG_WIDTH)
+#define LRU_GEN_PGOFF (KASAN_TAG_PGOFF - LRU_GEN_WIDTH)
+#define LRU_REFS_PGOFF (LRU_GEN_PGOFF - LRU_REFS_WIDTH)
/*
* Define the bit shifts to access each section. For non-existent
--- a/include/linux/page-flags-layout.h
+++ b/include/linux/page-flags-layout.h
@@ -55,7 +55,8 @@
#define SECTIONS_WIDTH 0
#endif
-#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS
+#if ZONES_WIDTH + LRU_GEN_WIDTH + SECTIONS_WIDTH + NODES_SHIFT \
+ <= BITS_PER_LONG - NR_PAGEFLAGS
#define NODES_WIDTH NODES_SHIFT
#elif defined(CONFIG_SPARSEMEM_VMEMMAP)
#error "Vmemmap: No space for nodes field in page flags"
@@ -89,8 +90,8 @@
#define LAST_CPUPID_SHIFT 0
#endif
-#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + KASAN_TAG_WIDTH + LAST_CPUPID_SHIFT \
- <= BITS_PER_LONG - NR_PAGEFLAGS
+#if ZONES_WIDTH + LRU_GEN_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + \
+ KASAN_TAG_WIDTH + LAST_CPUPID_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS
#define LAST_CPUPID_WIDTH LAST_CPUPID_SHIFT
#else
#define LAST_CPUPID_WIDTH 0
@@ -100,10 +101,12 @@
#define LAST_CPUPID_NOT_IN_PAGE_FLAGS
#endif
-#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + KASAN_TAG_WIDTH + LAST_CPUPID_WIDTH \
- > BITS_PER_LONG - NR_PAGEFLAGS
+#if ZONES_WIDTH + LRU_GEN_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + \
+ KASAN_TAG_WIDTH + LAST_CPUPID_WIDTH > BITS_PER_LONG - NR_PAGEFLAGS
#error "Not enough bits in page flags"
#endif
+#define LRU_REFS_WIDTH 0
+
#endif
#endif /* _LINUX_PAGE_FLAGS_LAYOUT */
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -1058,7 +1058,7 @@ static __always_inline void __ClearPageA
1UL << PG_private | 1UL << PG_private_2 | \
1UL << PG_writeback | 1UL << PG_reserved | \
1UL << PG_slab | 1UL << PG_active | \
- 1UL << PG_unevictable | __PG_MLOCKED)
+ 1UL << PG_unevictable | __PG_MLOCKED | LRU_GEN_MASK)
/*
* Flags checked when a page is prepped for return by the page allocator.
@@ -1069,7 +1069,7 @@ static __always_inline void __ClearPageA
* alloc-free cycle to prevent from reusing the page.
*/
#define PAGE_FLAGS_CHECK_AT_PREP \
- (PAGEFLAGS_MASK & ~__PG_HWPOISON)
+ ((PAGEFLAGS_MASK & ~__PG_HWPOISON) | LRU_GEN_MASK | LRU_REFS_MASK)
#define PAGE_FLAGS_PRIVATE \
(1UL << PG_private | 1UL << PG_private_2)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -914,6 +914,10 @@ struct task_struct {
#ifdef CONFIG_MEMCG
unsigned in_user_fault:1;
#endif
+#ifdef CONFIG_LRU_GEN
+ /* whether the LRU algorithm may apply to this access */
+ unsigned in_lru_fault:1;
+#endif
#ifdef CONFIG_COMPAT_BRK
unsigned brk_randomized:1;
#endif
--- a/kernel/bounds.c
+++ b/kernel/bounds.c
@@ -22,6 +22,11 @@ int main(void)
DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS));
#endif
DEFINE(SPINLOCK_SIZE, sizeof(spinlock_t));
+#ifdef CONFIG_LRU_GEN
+ DEFINE(LRU_GEN_WIDTH, order_base_2(MAX_NR_GENS + 1));
+#else
+ DEFINE(LRU_GEN_WIDTH, 0);
+#endif
/* End of constants */
return 0;
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -1124,6 +1124,14 @@ config PTE_MARKER_UFFD_WP
purposes. It is required to enable userfaultfd write protection on
file-backed memory types like shmem and hugetlbfs.
+config LRU_GEN
+ bool "Multi-Gen LRU"
+ depends on MMU
+ # make sure folio->flags has enough spare bits
+ depends on 64BIT || !SPARSEMEM || SPARSEMEM_VMEMMAP
+ help
+ A high performance LRU implementation to overcommit memory.
+
source "mm/damon/Kconfig"
endmenu
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2438,7 +2438,8 @@ static void __split_huge_page_tail(struc
#ifdef CONFIG_64BIT
(1L << PG_arch_2) |
#endif
- (1L << PG_dirty)));
+ (1L << PG_dirty) |
+ LRU_GEN_MASK | LRU_REFS_MASK));
/* ->mapping in first tail page is compound_mapcount */
VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING,
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5170,6 +5170,7 @@ static void __mem_cgroup_free(struct mem
static void mem_cgroup_free(struct mem_cgroup *memcg)
{
+ lru_gen_exit_memcg(memcg);
memcg_wb_domain_exit(memcg);
__mem_cgroup_free(memcg);
}
@@ -5228,6 +5229,7 @@ static struct mem_cgroup *mem_cgroup_all
memcg->deferred_split_queue.split_queue_len = 0;
#endif
idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
+ lru_gen_init_memcg(memcg);
return memcg;
fail:
mem_cgroup_id_remove(memcg);
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5110,6 +5110,27 @@ static inline void mm_account_fault(stru
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
}
+#ifdef CONFIG_LRU_GEN
+static void lru_gen_enter_fault(struct vm_area_struct *vma)
+{
+ /* the LRU algorithm doesn't apply to sequential or random reads */
+ current->in_lru_fault = !(vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ));
+}
+
+static void lru_gen_exit_fault(void)
+{
+ current->in_lru_fault = false;
+}
+#else
+static void lru_gen_enter_fault(struct vm_area_struct *vma)
+{
+}
+
+static void lru_gen_exit_fault(void)
+{
+}
+#endif /* CONFIG_LRU_GEN */
+
/*
* By the time we get here, we already hold the mm semaphore
*
@@ -5141,11 +5162,15 @@ vm_fault_t handle_mm_fault(struct vm_are
if (flags & FAULT_FLAG_USER)
mem_cgroup_enter_user_fault();
+ lru_gen_enter_fault(vma);
+
if (unlikely(is_vm_hugetlb_page(vma)))
ret = hugetlb_fault(vma->vm_mm, vma, address, flags);
else
ret = __handle_mm_fault(vma, address, flags);
+ lru_gen_exit_fault();
+
if (flags & FAULT_FLAG_USER) {
mem_cgroup_exit_user_fault();
/*
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -65,14 +65,16 @@ void __init mminit_verify_pageflags_layo
shift = 8 * sizeof(unsigned long);
width = shift - SECTIONS_WIDTH - NODES_WIDTH - ZONES_WIDTH
- - LAST_CPUPID_SHIFT - KASAN_TAG_WIDTH;
+ - LAST_CPUPID_SHIFT - KASAN_TAG_WIDTH - LRU_GEN_WIDTH - LRU_REFS_WIDTH;
mminit_dprintk(MMINIT_TRACE, "pageflags_layout_widths",
- "Section %d Node %d Zone %d Lastcpupid %d Kasantag %d Flags %d\n",
+ "Section %d Node %d Zone %d Lastcpupid %d Kasantag %d Gen %d Tier %d Flags %d\n",
SECTIONS_WIDTH,
NODES_WIDTH,
ZONES_WIDTH,
LAST_CPUPID_WIDTH,
KASAN_TAG_WIDTH,
+ LRU_GEN_WIDTH,
+ LRU_REFS_WIDTH,
NR_PAGEFLAGS);
mminit_dprintk(MMINIT_TRACE, "pageflags_layout_shifts",
"Section %d Node %d Zone %d Lastcpupid %d Kasantag %d\n",
--- a/mm/mmzone.c
+++ b/mm/mmzone.c
@@ -88,6 +88,8 @@ void lruvec_init(struct lruvec *lruvec)
* Poison its list head, so that any operations on it would crash.
*/
list_del(&lruvec->lists[LRU_UNEVICTABLE]);
+
+ lru_gen_init_lruvec(lruvec);
}
#if defined(CONFIG_NUMA_BALANCING) && !defined(LAST_CPUPID_NOT_IN_PAGE_FLAGS)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -484,6 +484,11 @@ void folio_add_lru(struct folio *folio)
folio_test_unevictable(folio), folio);
VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
+ /* see the comment in lru_gen_add_folio() */
+ if (lru_gen_enabled() && !folio_test_unevictable(folio) &&
+ lru_gen_in_fault() && !(current->flags & PF_MEMALLOC))
+ folio_set_active(folio);
+
folio_get(folio);
local_lock(&cpu_fbatches.lock);
fbatch = this_cpu_ptr(&cpu_fbatches.lru_add);
@@ -575,7 +580,7 @@ static void lru_deactivate_file_fn(struc
static void lru_deactivate_fn(struct lruvec *lruvec, struct folio *folio)
{
- if (folio_test_active(folio) && !folio_test_unevictable(folio)) {
+ if (!folio_test_unevictable(folio) && (folio_test_active(folio) || lru_gen_enabled())) {
long nr_pages = folio_nr_pages(folio);
lruvec_del_folio(lruvec, folio);
@@ -688,8 +693,8 @@ void deactivate_page(struct page *page)
{
struct folio *folio = page_folio(page);
- if (folio_test_lru(folio) && folio_test_active(folio) &&
- !folio_test_unevictable(folio)) {
+ if (folio_test_lru(folio) && !folio_test_unevictable(folio) &&
+ (folio_test_active(folio) || lru_gen_enabled())) {
struct folio_batch *fbatch;
folio_get(folio);
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3050,6 +3050,81 @@ static bool can_age_anon_pages(struct pg
return can_demote(pgdat->node_id, sc);
}
+#ifdef CONFIG_LRU_GEN
+
+/******************************************************************************
+ * shorthand helpers
+ ******************************************************************************/
+
+#define for_each_gen_type_zone(gen, type, zone) \
+ for ((gen) = 0; (gen) < MAX_NR_GENS; (gen)++) \
+ for ((type) = 0; (type) < ANON_AND_FILE; (type)++) \
+ for ((zone) = 0; (zone) < MAX_NR_ZONES; (zone)++)
+
+static struct lruvec __maybe_unused *get_lruvec(struct mem_cgroup *memcg, int nid)
+{
+ struct pglist_data *pgdat = NODE_DATA(nid);
+
+#ifdef CONFIG_MEMCG
+ if (memcg) {
+ struct lruvec *lruvec = &memcg->nodeinfo[nid]->lruvec;
+
+ /* for hotadd_new_pgdat() */
+ if (!lruvec->pgdat)
+ lruvec->pgdat = pgdat;
+
+ return lruvec;
+ }
+#endif
+ VM_WARN_ON_ONCE(!mem_cgroup_disabled());
+
+ return pgdat ? &pgdat->__lruvec : NULL;
+}
+
+/******************************************************************************
+ * initialization
+ ******************************************************************************/
+
+void lru_gen_init_lruvec(struct lruvec *lruvec)
+{
+ int gen, type, zone;
+ struct lru_gen_struct *lrugen = &lruvec->lrugen;
+
+ lrugen->max_seq = MIN_NR_GENS + 1;
+
+ for_each_gen_type_zone(gen, type, zone)
+ INIT_LIST_HEAD(&lrugen->lists[gen][type][zone]);
+}
+
+#ifdef CONFIG_MEMCG
+void lru_gen_init_memcg(struct mem_cgroup *memcg)
+{
+}
+
+void lru_gen_exit_memcg(struct mem_cgroup *memcg)
+{
+ int nid;
+
+ for_each_node(nid) {
+ struct lruvec *lruvec = get_lruvec(memcg, nid);
+
+ VM_WARN_ON_ONCE(memchr_inv(lruvec->lrugen.nr_pages, 0,
+ sizeof(lruvec->lrugen.nr_pages)));
+ }
+}
+#endif
+
+static int __init init_lru_gen(void)
+{
+ BUILD_BUG_ON(MIN_NR_GENS + 1 >= MAX_NR_GENS);
+ BUILD_BUG_ON(BIT(LRU_GEN_WIDTH) <= MAX_NR_GENS);
+
+ return 0;
+};
+late_initcall(init_lru_gen);
+
+#endif /* CONFIG_LRU_GEN */
+
static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
{
unsigned long nr[NR_LRU_LISTS];

View File

@ -0,0 +1,476 @@
From 93fa87bdef9e7fa9977355c4712c000f31639231 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com>
Date: Thu, 27 Jan 2022 20:43:22 -0700
Subject: [PATCH 07/14] mm: multi-gen LRU: exploit locality in rmap
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Searching the rmap for PTEs mapping each page on an LRU list (to test
and clear the accessed bit) can be expensive because pages from
different VMAs (PA space) are not cache friendly to the rmap (VA
space). For workloads mostly using mapped pages, searching the rmap
can incur the highest CPU cost in the reclaim path.
This patch exploits spatial locality to reduce the trips into the
rmap. When shrink_page_list() walks the rmap and finds a young PTE, a
new function lru_gen_look_around() scans at most BITS_PER_LONG-1
adjacent PTEs. On finding another young PTE, it clears the accessed
bit and updates the gen counter of the page mapped by this PTE to
(max_seq%MAX_NR_GENS)+1.
Server benchmark results:
Single workload:
fio (buffered I/O): no change
Single workload:
memcached (anon): +[3, 5]%
Ops/sec KB/sec
patch1-6: 1106168.46 43025.04
patch1-7: 1147696.57 44640.29
Configurations:
no change
Client benchmark results:
kswapd profiles:
patch1-6
39.03% lzo1x_1_do_compress (real work)
18.47% page_vma_mapped_walk (overhead)
6.74% _raw_spin_unlock_irq
3.97% do_raw_spin_lock
2.49% ptep_clear_flush
2.48% anon_vma_interval_tree_iter_first
1.92% folio_referenced_one
1.88% __zram_bvec_write
1.48% memmove
1.31% vma_interval_tree_iter_next
patch1-7
48.16% lzo1x_1_do_compress (real work)
8.20% page_vma_mapped_walk (overhead)
7.06% _raw_spin_unlock_irq
2.92% ptep_clear_flush
2.53% __zram_bvec_write
2.11% do_raw_spin_lock
2.02% memmove
1.93% lru_gen_look_around
1.56% free_unref_page_list
1.40% memset
Configurations:
no change
Signed-off-by: Yu Zhao <yuzhao@google.com>
Acked-by: Barry Song <baohua@kernel.org>
Acked-by: Brian Geffon <bgeffon@google.com>
Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Acked-by: Steven Barrett <steven@liquorix.net>
Acked-by: Suleiman Souhlal <suleiman@google.com>
Tested-by: Daniel Byrne <djbyrne@mtu.edu>
Tested-by: Donald Carr <d@chaos-reins.com>
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
Tested-by: Sofia Trinh <sofia.trinh@edi.works>
Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
Change-Id: I4b9ca0fd20f566ce554e703f14cee3fe0048c2fd
---
include/linux/memcontrol.h | 31 +++++++
include/linux/mm.h | 5 +
include/linux/mmzone.h | 6 ++
mm/internal.h | 1 +
mm/memcontrol.c | 1 +
mm/rmap.c | 6 ++
mm/swap.c | 4 +-
mm/vmscan.c | 184 +++++++++++++++++++++++++++++++++++++
8 files changed, 236 insertions(+), 2 deletions(-)
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -444,6 +444,7 @@ static inline struct obj_cgroup *__folio
* - LRU isolation
* - lock_page_memcg()
* - exclusive reference
+ * - mem_cgroup_trylock_pages()
*
* For a kmem folio a caller should hold an rcu read lock to protect memcg
* associated with a kmem folio from being released.
@@ -505,6 +506,7 @@ static inline struct mem_cgroup *folio_m
* - LRU isolation
* - lock_page_memcg()
* - exclusive reference
+ * - mem_cgroup_trylock_pages()
*
* For a kmem page a caller should hold an rcu read lock to protect memcg
* associated with a kmem page from being released.
@@ -959,6 +961,23 @@ void unlock_page_memcg(struct page *page
void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val);
+/* try to stablize folio_memcg() for all the pages in a memcg */
+static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg)
+{
+ rcu_read_lock();
+
+ if (mem_cgroup_disabled() || !atomic_read(&memcg->moving_account))
+ return true;
+
+ rcu_read_unlock();
+ return false;
+}
+
+static inline void mem_cgroup_unlock_pages(void)
+{
+ rcu_read_unlock();
+}
+
/* idx can be of type enum memcg_stat_item or node_stat_item */
static inline void mod_memcg_state(struct mem_cgroup *memcg,
int idx, int val)
@@ -1433,6 +1452,18 @@ static inline void folio_memcg_unlock(st
{
}
+static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg)
+{
+ /* to match folio_memcg_rcu() */
+ rcu_read_lock();
+ return true;
+}
+
+static inline void mem_cgroup_unlock_pages(void)
+{
+ rcu_read_unlock();
+}
+
static inline void mem_cgroup_handle_over_high(void)
{
}
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1465,6 +1465,11 @@ static inline unsigned long folio_pfn(st
return page_to_pfn(&folio->page);
}
+static inline struct folio *pfn_folio(unsigned long pfn)
+{
+ return page_folio(pfn_to_page(pfn));
+}
+
static inline atomic_t *folio_pincount_ptr(struct folio *folio)
{
return &folio_page(folio, 1)->compound_pincount;
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -372,6 +372,7 @@ enum lruvec_flags {
#ifndef __GENERATING_BOUNDS_H
struct lruvec;
+struct page_vma_mapped_walk;
#define LRU_GEN_MASK ((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF)
#define LRU_REFS_MASK ((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF)
@@ -427,6 +428,7 @@ struct lru_gen_struct {
};
void lru_gen_init_lruvec(struct lruvec *lruvec);
+void lru_gen_look_around(struct page_vma_mapped_walk *pvmw);
#ifdef CONFIG_MEMCG
void lru_gen_init_memcg(struct mem_cgroup *memcg);
@@ -439,6 +441,10 @@ static inline void lru_gen_init_lruvec(s
{
}
+static inline void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
+{
+}
+
#ifdef CONFIG_MEMCG
static inline void lru_gen_init_memcg(struct mem_cgroup *memcg)
{
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -83,6 +83,7 @@ vm_fault_t do_swap_page(struct vm_fault
void folio_rotate_reclaimable(struct folio *folio);
bool __folio_end_writeback(struct folio *folio);
void deactivate_file_folio(struct folio *folio);
+void folio_activate(struct folio *folio);
void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
unsigned long floor, unsigned long ceiling);
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2789,6 +2789,7 @@ static void commit_charge(struct folio *
* - LRU isolation
* - lock_page_memcg()
* - exclusive reference
+ * - mem_cgroup_trylock_pages()
*/
folio->memcg_data = (unsigned long)memcg;
}
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -833,6 +833,12 @@ static bool folio_referenced_one(struct
}
if (pvmw.pte) {
+ if (lru_gen_enabled() && pte_young(*pvmw.pte) &&
+ !(vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ))) {
+ lru_gen_look_around(&pvmw);
+ referenced++;
+ }
+
if (ptep_clear_flush_young_notify(vma, address,
pvmw.pte)) {
/*
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -366,7 +366,7 @@ static void folio_activate_drain(int cpu
folio_batch_move_lru(fbatch, folio_activate_fn);
}
-static void folio_activate(struct folio *folio)
+void folio_activate(struct folio *folio)
{
if (folio_test_lru(folio) && !folio_test_active(folio) &&
!folio_test_unevictable(folio)) {
@@ -385,7 +385,7 @@ static inline void folio_activate_drain(
{
}
-static void folio_activate(struct folio *folio)
+void folio_activate(struct folio *folio)
{
struct lruvec *lruvec;
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1635,6 +1635,11 @@ retry:
if (!sc->may_unmap && folio_mapped(folio))
goto keep_locked;
+ /* folio_update_gen() tried to promote this page? */
+ if (lru_gen_enabled() && !ignore_references &&
+ folio_mapped(folio) && folio_test_referenced(folio))
+ goto keep_locked;
+
/*
* The number of dirty pages determines if a node is marked
* reclaim_congested. kswapd will stall and start writing
@@ -3219,6 +3224,29 @@ static bool positive_ctrl_err(struct ctr
* the aging
******************************************************************************/
+/* promote pages accessed through page tables */
+static int folio_update_gen(struct folio *folio, int gen)
+{
+ unsigned long new_flags, old_flags = READ_ONCE(folio->flags);
+
+ VM_WARN_ON_ONCE(gen >= MAX_NR_GENS);
+ VM_WARN_ON_ONCE(!rcu_read_lock_held());
+
+ do {
+ /* lru_gen_del_folio() has isolated this page? */
+ if (!(old_flags & LRU_GEN_MASK)) {
+ /* for shrink_page_list() */
+ new_flags = old_flags | BIT(PG_referenced);
+ continue;
+ }
+
+ new_flags = old_flags & ~(LRU_GEN_MASK | LRU_REFS_MASK | LRU_REFS_FLAGS);
+ new_flags |= (gen + 1UL) << LRU_GEN_PGOFF;
+ } while (!try_cmpxchg(&folio->flags, &old_flags, new_flags));
+
+ return ((old_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
+}
+
/* protect pages accessed multiple times through file descriptors */
static int folio_inc_gen(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
{
@@ -3230,6 +3258,11 @@ static int folio_inc_gen(struct lruvec *
VM_WARN_ON_ONCE_FOLIO(!(old_flags & LRU_GEN_MASK), folio);
do {
+ new_gen = ((old_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
+ /* folio_update_gen() has promoted this page? */
+ if (new_gen >= 0 && new_gen != old_gen)
+ return new_gen;
+
new_gen = (old_gen + 1) % MAX_NR_GENS;
new_flags = old_flags & ~(LRU_GEN_MASK | LRU_REFS_MASK | LRU_REFS_FLAGS);
@@ -3244,6 +3277,43 @@ static int folio_inc_gen(struct lruvec *
return new_gen;
}
+static unsigned long get_pte_pfn(pte_t pte, struct vm_area_struct *vma, unsigned long addr)
+{
+ unsigned long pfn = pte_pfn(pte);
+
+ VM_WARN_ON_ONCE(addr < vma->vm_start || addr >= vma->vm_end);
+
+ if (!pte_present(pte) || is_zero_pfn(pfn))
+ return -1;
+
+ if (WARN_ON_ONCE(pte_devmap(pte) || pte_special(pte)))
+ return -1;
+
+ if (WARN_ON_ONCE(!pfn_valid(pfn)))
+ return -1;
+
+ return pfn;
+}
+
+static struct folio *get_pfn_folio(unsigned long pfn, struct mem_cgroup *memcg,
+ struct pglist_data *pgdat)
+{
+ struct folio *folio;
+
+ /* try to avoid unnecessary memory loads */
+ if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat))
+ return NULL;
+
+ folio = pfn_folio(pfn);
+ if (folio_nid(folio) != pgdat->node_id)
+ return NULL;
+
+ if (folio_memcg_rcu(folio) != memcg)
+ return NULL;
+
+ return folio;
+}
+
static void inc_min_seq(struct lruvec *lruvec, int type)
{
struct lru_gen_struct *lrugen = &lruvec->lrugen;
@@ -3443,6 +3513,114 @@ static void lru_gen_age_node(struct pgli
} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
}
+/*
+ * This function exploits spatial locality when shrink_page_list() walks the
+ * rmap. It scans the adjacent PTEs of a young PTE and promotes hot pages.
+ */
+void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
+{
+ int i;
+ pte_t *pte;
+ unsigned long start;
+ unsigned long end;
+ unsigned long addr;
+ unsigned long bitmap[BITS_TO_LONGS(MIN_LRU_BATCH)] = {};
+ struct folio *folio = pfn_folio(pvmw->pfn);
+ struct mem_cgroup *memcg = folio_memcg(folio);
+ struct pglist_data *pgdat = folio_pgdat(folio);
+ struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
+ DEFINE_MAX_SEQ(lruvec);
+ int old_gen, new_gen = lru_gen_from_seq(max_seq);
+
+ lockdep_assert_held(pvmw->ptl);
+ VM_WARN_ON_ONCE_FOLIO(folio_test_lru(folio), folio);
+
+ if (spin_is_contended(pvmw->ptl))
+ return;
+
+ start = max(pvmw->address & PMD_MASK, pvmw->vma->vm_start);
+ end = min(pvmw->address | ~PMD_MASK, pvmw->vma->vm_end - 1) + 1;
+
+ if (end - start > MIN_LRU_BATCH * PAGE_SIZE) {
+ if (pvmw->address - start < MIN_LRU_BATCH * PAGE_SIZE / 2)
+ end = start + MIN_LRU_BATCH * PAGE_SIZE;
+ else if (end - pvmw->address < MIN_LRU_BATCH * PAGE_SIZE / 2)
+ start = end - MIN_LRU_BATCH * PAGE_SIZE;
+ else {
+ start = pvmw->address - MIN_LRU_BATCH * PAGE_SIZE / 2;
+ end = pvmw->address + MIN_LRU_BATCH * PAGE_SIZE / 2;
+ }
+ }
+
+ pte = pvmw->pte - (pvmw->address - start) / PAGE_SIZE;
+
+ rcu_read_lock();
+ arch_enter_lazy_mmu_mode();
+
+ for (i = 0, addr = start; addr != end; i++, addr += PAGE_SIZE) {
+ unsigned long pfn;
+
+ pfn = get_pte_pfn(pte[i], pvmw->vma, addr);
+ if (pfn == -1)
+ continue;
+
+ if (!pte_young(pte[i]))
+ continue;
+
+ folio = get_pfn_folio(pfn, memcg, pgdat);
+ if (!folio)
+ continue;
+
+ if (!ptep_test_and_clear_young(pvmw->vma, addr, pte + i))
+ VM_WARN_ON_ONCE(true);
+
+ if (pte_dirty(pte[i]) && !folio_test_dirty(folio) &&
+ !(folio_test_anon(folio) && folio_test_swapbacked(folio) &&
+ !folio_test_swapcache(folio)))
+ folio_mark_dirty(folio);
+
+ old_gen = folio_lru_gen(folio);
+ if (old_gen < 0)
+ folio_set_referenced(folio);
+ else if (old_gen != new_gen)
+ __set_bit(i, bitmap);
+ }
+
+ arch_leave_lazy_mmu_mode();
+ rcu_read_unlock();
+
+ if (bitmap_weight(bitmap, MIN_LRU_BATCH) < PAGEVEC_SIZE) {
+ for_each_set_bit(i, bitmap, MIN_LRU_BATCH) {
+ folio = pfn_folio(pte_pfn(pte[i]));
+ folio_activate(folio);
+ }
+ return;
+ }
+
+ /* folio_update_gen() requires stable folio_memcg() */
+ if (!mem_cgroup_trylock_pages(memcg))
+ return;
+
+ spin_lock_irq(&lruvec->lru_lock);
+ new_gen = lru_gen_from_seq(lruvec->lrugen.max_seq);
+
+ for_each_set_bit(i, bitmap, MIN_LRU_BATCH) {
+ folio = pfn_folio(pte_pfn(pte[i]));
+ if (folio_memcg_rcu(folio) != memcg)
+ continue;
+
+ old_gen = folio_update_gen(folio, new_gen);
+ if (old_gen < 0 || old_gen == new_gen)
+ continue;
+
+ lru_gen_update_size(lruvec, folio, old_gen, new_gen);
+ }
+
+ spin_unlock_irq(&lruvec->lru_lock);
+
+ mem_cgroup_unlock_pages();
+}
+
/******************************************************************************
* the eviction
******************************************************************************/
@@ -3479,6 +3657,12 @@ static bool sort_folio(struct lruvec *lr
return true;
}
+ /* promoted */
+ if (gen != lru_gen_from_seq(lrugen->min_seq[type])) {
+ list_move(&folio->lru, &lrugen->lists[gen][type][zone]);
+ return true;
+ }
+
/* protected */
if (tier > tier_idx) {
int hist = lru_hist_from_seq(lrugen->min_seq[type]);

View File

@ -0,0 +1,290 @@
From 6b9670b94ba2b49b289b997121062500e32fc3e4 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com>
Date: Thu, 27 Jan 2022 19:59:54 -0700
Subject: [PATCH 09/14] mm: multi-gen LRU: optimize multiple memcgs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
When multiple memcgs are available, it is possible to use generations
as a frame of reference to make better choices and improve overall
performance under global memory pressure. This patch adds a basic
optimization to select memcgs that can drop single-use unmapped clean
pages first. Doing so reduces the chance of going into the aging path
or swapping, which can be costly.
A typical example that benefits from this optimization is a server
running mixed types of workloads, e.g., heavy anon workload in one
memcg and heavy buffered I/O workload in the other.
Though this optimization can be applied to both kswapd and direct
reclaim, it is only added to kswapd to keep the patchset manageable.
Later improvements may cover the direct reclaim path.
While ensuring certain fairness to all eligible memcgs, proportional
scans of individual memcgs also require proper backoff to avoid
overshooting their aggregate reclaim target by too much. Otherwise it
can cause high direct reclaim latency. The conditions for backoff are:
1. At low priorities, for direct reclaim, if aging fairness or direct
reclaim latency is at risk, i.e., aging one memcg multiple times or
swapping after the target is met.
2. At high priorities, for global reclaim, if per-zone free pages are
above respective watermarks.
Server benchmark results:
Mixed workloads:
fio (buffered I/O): +[19, 21]%
IOPS BW
patch1-8: 1880k 7343MiB/s
patch1-9: 2252k 8796MiB/s
memcached (anon): +[119, 123]%
Ops/sec KB/sec
patch1-8: 862768.65 33514.68
patch1-9: 1911022.12 74234.54
Mixed workloads:
fio (buffered I/O): +[75, 77]%
IOPS BW
5.19-rc1: 1279k 4996MiB/s
patch1-9: 2252k 8796MiB/s
memcached (anon): +[13, 15]%
Ops/sec KB/sec
5.19-rc1: 1673524.04 65008.87
patch1-9: 1911022.12 74234.54
Configurations:
(changes since patch 6)
cat mixed.sh
modprobe brd rd_nr=2 rd_size=56623104
swapoff -a
mkswap /dev/ram0
swapon /dev/ram0
mkfs.ext4 /dev/ram1
mount -t ext4 /dev/ram1 /mnt
memtier_benchmark -S /var/run/memcached/memcached.sock \
-P memcache_binary -n allkeys --key-minimum=1 \
--key-maximum=50000000 --key-pattern=P:P -c 1 -t 36 \
--ratio 1:0 --pipeline 8 -d 2000
fio -name=mglru --numjobs=36 --directory=/mnt --size=1408m \
--buffered=1 --ioengine=io_uring --iodepth=128 \
--iodepth_batch_submit=32 --iodepth_batch_complete=32 \
--rw=randread --random_distribution=random --norandommap \
--time_based --ramp_time=10m --runtime=90m --group_reporting &
pid=$!
sleep 200
memtier_benchmark -S /var/run/memcached/memcached.sock \
-P memcache_binary -n allkeys --key-minimum=1 \
--key-maximum=50000000 --key-pattern=R:R -c 1 -t 36 \
--ratio 0:1 --pipeline 8 --randomize --distinct-client-seed
kill -INT $pid
wait
Client benchmark results:
no change (CONFIG_MEMCG=n)
Signed-off-by: Yu Zhao <yuzhao@google.com>
Acked-by: Brian Geffon <bgeffon@google.com>
Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Acked-by: Steven Barrett <steven@liquorix.net>
Acked-by: Suleiman Souhlal <suleiman@google.com>
Tested-by: Daniel Byrne <djbyrne@mtu.edu>
Tested-by: Donald Carr <d@chaos-reins.com>
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
Tested-by: Sofia Trinh <sofia.trinh@edi.works>
Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
Change-Id: I7e00e0c733437e534ac98031cf8154a681becc00
---
mm/vmscan.c | 104 +++++++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 95 insertions(+), 9 deletions(-)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -131,6 +131,12 @@ struct scan_control {
/* Always discard instead of demoting to lower tier memory */
unsigned int no_demotion:1;
+#ifdef CONFIG_LRU_GEN
+ /* help kswapd make better choices among multiple memcgs */
+ unsigned int memcgs_need_aging:1;
+ unsigned long last_reclaimed;
+#endif
+
/* Allocation order */
s8 order;
@@ -4429,6 +4435,19 @@ static void lru_gen_age_node(struct pgli
VM_WARN_ON_ONCE(!current_is_kswapd());
+ sc->last_reclaimed = sc->nr_reclaimed;
+
+ /*
+ * To reduce the chance of going into the aging path, which can be
+ * costly, optimistically skip it if the flag below was cleared in the
+ * eviction path. This improves the overall performance when multiple
+ * memcgs are available.
+ */
+ if (!sc->memcgs_need_aging) {
+ sc->memcgs_need_aging = true;
+ return;
+ }
+
set_mm_walk(pgdat);
memcg = mem_cgroup_iter(NULL, NULL, NULL);
@@ -4840,7 +4859,8 @@ static int isolate_folios(struct lruvec
return scanned;
}
-static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swappiness)
+static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swappiness,
+ bool *need_swapping)
{
int type;
int scanned;
@@ -4903,6 +4923,9 @@ static int evict_folios(struct lruvec *l
sc->nr_reclaimed += reclaimed;
+ if (need_swapping && type == LRU_GEN_ANON)
+ *need_swapping = true;
+
return scanned;
}
@@ -4912,9 +4935,8 @@ static int evict_folios(struct lruvec *l
* reclaim.
*/
static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc,
- bool can_swap)
+ bool can_swap, bool *need_aging)
{
- bool need_aging;
unsigned long nr_to_scan;
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
DEFINE_MAX_SEQ(lruvec);
@@ -4924,8 +4946,8 @@ static unsigned long get_nr_to_scan(stru
(mem_cgroup_below_low(memcg) && !sc->memcg_low_reclaim))
return 0;
- need_aging = should_run_aging(lruvec, max_seq, min_seq, sc, can_swap, &nr_to_scan);
- if (!need_aging)
+ *need_aging = should_run_aging(lruvec, max_seq, min_seq, sc, can_swap, &nr_to_scan);
+ if (!*need_aging)
return nr_to_scan;
/* skip the aging path at the default priority */
@@ -4942,10 +4964,67 @@ done:
return min_seq[!can_swap] + MIN_NR_GENS <= max_seq ? nr_to_scan : 0;
}
+static bool should_abort_scan(struct lruvec *lruvec, unsigned long seq,
+ struct scan_control *sc, bool need_swapping)
+{
+ int i;
+ DEFINE_MAX_SEQ(lruvec);
+
+ if (!current_is_kswapd()) {
+ /* age each memcg at most once to ensure fairness */
+ if (max_seq - seq > 1)
+ return true;
+
+ /* over-swapping can increase allocation latency */
+ if (sc->nr_reclaimed >= sc->nr_to_reclaim && need_swapping)
+ return true;
+
+ /* give this thread a chance to exit and free its memory */
+ if (fatal_signal_pending(current)) {
+ sc->nr_reclaimed += MIN_LRU_BATCH;
+ return true;
+ }
+
+ if (cgroup_reclaim(sc))
+ return false;
+ } else if (sc->nr_reclaimed - sc->last_reclaimed < sc->nr_to_reclaim)
+ return false;
+
+ /* keep scanning at low priorities to ensure fairness */
+ if (sc->priority > DEF_PRIORITY - 2)
+ return false;
+
+ /*
+ * A minimum amount of work was done under global memory pressure. For
+ * kswapd, it may be overshooting. For direct reclaim, the allocation
+ * may succeed if all suitable zones are somewhat safe. In either case,
+ * it's better to stop now, and restart later if necessary.
+ */
+ for (i = 0; i <= sc->reclaim_idx; i++) {
+ unsigned long wmark;
+ struct zone *zone = lruvec_pgdat(lruvec)->node_zones + i;
+
+ if (!managed_zone(zone))
+ continue;
+
+ wmark = current_is_kswapd() ? high_wmark_pages(zone) : low_wmark_pages(zone);
+ if (wmark > zone_page_state(zone, NR_FREE_PAGES))
+ return false;
+ }
+
+ sc->nr_reclaimed += MIN_LRU_BATCH;
+
+ return true;
+}
+
static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
{
struct blk_plug plug;
+ bool need_aging = false;
+ bool need_swapping = false;
unsigned long scanned = 0;
+ unsigned long reclaimed = sc->nr_reclaimed;
+ DEFINE_MAX_SEQ(lruvec);
lru_add_drain();
@@ -4965,21 +5044,28 @@ static void lru_gen_shrink_lruvec(struct
else
swappiness = 0;
- nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness);
+ nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness, &need_aging);
if (!nr_to_scan)
- break;
+ goto done;
- delta = evict_folios(lruvec, sc, swappiness);
+ delta = evict_folios(lruvec, sc, swappiness, &need_swapping);
if (!delta)
- break;
+ goto done;
scanned += delta;
if (scanned >= nr_to_scan)
break;
+ if (should_abort_scan(lruvec, max_seq, sc, need_swapping))
+ break;
+
cond_resched();
}
+ /* see the comment in lru_gen_age_node() */
+ if (sc->nr_reclaimed - reclaimed >= MIN_LRU_BATCH && !need_aging)
+ sc->memcgs_need_aging = false;
+done:
clear_mm_walk();
blk_finish_plug(&plug);

View File

@ -0,0 +1,475 @@
From ef61bb3622ee0f36e055dfd5006badff08f5ce61 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com>
Date: Thu, 27 Jan 2022 19:52:09 -0700
Subject: [PATCH 10/14] mm: multi-gen LRU: kill switch
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Add /sys/kernel/mm/lru_gen/enabled as a kill switch. Components that
can be disabled include:
0x0001: the multi-gen LRU core
0x0002: walking page table, when arch_has_hw_pte_young() returns
true
0x0004: clearing the accessed bit in non-leaf PMD entries, when
CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG=y
[yYnN]: apply to all the components above
E.g.,
echo y >/sys/kernel/mm/lru_gen/enabled
cat /sys/kernel/mm/lru_gen/enabled
0x0007
echo 5 >/sys/kernel/mm/lru_gen/enabled
cat /sys/kernel/mm/lru_gen/enabled
0x0005
NB: the page table walks happen on the scale of seconds under heavy
memory pressure, in which case the mmap_lock contention is a lesser
concern, compared with the LRU lock contention and the I/O congestion.
So far the only well-known case of the mmap_lock contention happens on
Android, due to Scudo [1] which allocates several thousand VMAs for
merely a few hundred MBs. The SPF and the Maple Tree also have
provided their own assessments [2][3]. However, if walking page tables
does worsen the mmap_lock contention, the kill switch can be used to
disable it. In this case the multi-gen LRU will suffer a minor
performance degradation, as shown previously.
Clearing the accessed bit in non-leaf PMD entries can also be
disabled, since this behavior was not tested on x86 varieties other
than Intel and AMD.
[1] https://source.android.com/devices/tech/debug/scudo
[2] https://lore.kernel.org/r/20220128131006.67712-1-michel@lespinasse.org/
[3] https://lore.kernel.org/r/20220426150616.3937571-1-Liam.Howlett@oracle.com/
Signed-off-by: Yu Zhao <yuzhao@google.com>
Acked-by: Brian Geffon <bgeffon@google.com>
Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Acked-by: Steven Barrett <steven@liquorix.net>
Acked-by: Suleiman Souhlal <suleiman@google.com>
Tested-by: Daniel Byrne <djbyrne@mtu.edu>
Tested-by: Donald Carr <d@chaos-reins.com>
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
Tested-by: Sofia Trinh <sofia.trinh@edi.works>
Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
Change-Id: I4c909618e8fed7fb1337f6624bbe542ec920a515
---
include/linux/cgroup.h | 15 ++-
include/linux/mm_inline.h | 15 ++-
include/linux/mmzone.h | 9 ++
kernel/cgroup/cgroup-internal.h | 1 -
mm/Kconfig | 6 +
mm/vmscan.c | 228 +++++++++++++++++++++++++++++++-
6 files changed, 265 insertions(+), 9 deletions(-)
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -432,6 +432,18 @@ static inline void cgroup_put(struct cgr
css_put(&cgrp->self);
}
+extern struct mutex cgroup_mutex;
+
+static inline void cgroup_lock(void)
+{
+ mutex_lock(&cgroup_mutex);
+}
+
+static inline void cgroup_unlock(void)
+{
+ mutex_unlock(&cgroup_mutex);
+}
+
/**
* task_css_set_check - obtain a task's css_set with extra access conditions
* @task: the task to obtain css_set for
@@ -446,7 +458,6 @@ static inline void cgroup_put(struct cgr
* as locks used during the cgroup_subsys::attach() methods.
*/
#ifdef CONFIG_PROVE_RCU
-extern struct mutex cgroup_mutex;
extern spinlock_t css_set_lock;
#define task_css_set_check(task, __c) \
rcu_dereference_check((task)->cgroups, \
@@ -708,6 +719,8 @@ struct cgroup;
static inline u64 cgroup_id(const struct cgroup *cgrp) { return 1; }
static inline void css_get(struct cgroup_subsys_state *css) {}
static inline void css_put(struct cgroup_subsys_state *css) {}
+static inline void cgroup_lock(void) {}
+static inline void cgroup_unlock(void) {}
static inline int cgroup_attach_task_all(struct task_struct *from,
struct task_struct *t) { return 0; }
static inline int cgroupstats_build(struct cgroupstats *stats,
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -106,10 +106,21 @@ static __always_inline enum lru_list fol
#ifdef CONFIG_LRU_GEN
+#ifdef CONFIG_LRU_GEN_ENABLED
static inline bool lru_gen_enabled(void)
{
- return true;
+ DECLARE_STATIC_KEY_TRUE(lru_gen_caps[NR_LRU_GEN_CAPS]);
+
+ return static_branch_likely(&lru_gen_caps[LRU_GEN_CORE]);
+}
+#else
+static inline bool lru_gen_enabled(void)
+{
+ DECLARE_STATIC_KEY_FALSE(lru_gen_caps[NR_LRU_GEN_CAPS]);
+
+ return static_branch_unlikely(&lru_gen_caps[LRU_GEN_CORE]);
}
+#endif
static inline bool lru_gen_in_fault(void)
{
@@ -222,7 +233,7 @@ static inline bool lru_gen_add_folio(str
VM_WARN_ON_ONCE_FOLIO(gen != -1, folio);
- if (folio_test_unevictable(folio))
+ if (folio_test_unevictable(folio) || !lrugen->enabled)
return false;
/*
* There are three common cases for this page:
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -384,6 +384,13 @@ enum {
LRU_GEN_FILE,
};
+enum {
+ LRU_GEN_CORE,
+ LRU_GEN_MM_WALK,
+ LRU_GEN_NONLEAF_YOUNG,
+ NR_LRU_GEN_CAPS
+};
+
#define MIN_LRU_BATCH BITS_PER_LONG
#define MAX_LRU_BATCH (MIN_LRU_BATCH * 64)
@@ -425,6 +432,8 @@ struct lru_gen_struct {
/* can be modified without holding the LRU lock */
atomic_long_t evicted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
+ /* whether the multi-gen LRU is enabled */
+ bool enabled;
};
enum {
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -164,7 +164,6 @@ struct cgroup_mgctx {
#define DEFINE_CGROUP_MGCTX(name) \
struct cgroup_mgctx name = CGROUP_MGCTX_INIT(name)
-extern struct mutex cgroup_mutex;
extern spinlock_t css_set_lock;
extern struct cgroup_subsys *cgroup_subsys[];
extern struct list_head cgroup_roots;
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -1133,6 +1133,12 @@ config LRU_GEN
help
A high performance LRU implementation to overcommit memory.
+config LRU_GEN_ENABLED
+ bool "Enable by default"
+ depends on LRU_GEN
+ help
+ This option enables the multi-gen LRU by default.
+
config LRU_GEN_STATS
bool "Full stats for debugging"
depends on LRU_GEN
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -51,6 +51,7 @@
#include <linux/psi.h>
#include <linux/pagewalk.h>
#include <linux/shmem_fs.h>
+#include <linux/ctype.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -3070,6 +3071,14 @@ static bool can_age_anon_pages(struct pg
#ifdef CONFIG_LRU_GEN
+#ifdef CONFIG_LRU_GEN_ENABLED
+DEFINE_STATIC_KEY_ARRAY_TRUE(lru_gen_caps, NR_LRU_GEN_CAPS);
+#define get_cap(cap) static_branch_likely(&lru_gen_caps[cap])
+#else
+DEFINE_STATIC_KEY_ARRAY_FALSE(lru_gen_caps, NR_LRU_GEN_CAPS);
+#define get_cap(cap) static_branch_unlikely(&lru_gen_caps[cap])
+#endif
+
/******************************************************************************
* shorthand helpers
******************************************************************************/
@@ -3946,7 +3955,8 @@ static void walk_pmd_range_locked(pud_t
goto next;
if (!pmd_trans_huge(pmd[i])) {
- if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG))
+ if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) &&
+ get_cap(LRU_GEN_NONLEAF_YOUNG))
pmdp_test_and_clear_young(vma, addr, pmd + i);
goto next;
}
@@ -4044,10 +4054,12 @@ restart:
walk->mm_stats[MM_NONLEAF_TOTAL]++;
#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG
- if (!pmd_young(val))
- continue;
+ if (get_cap(LRU_GEN_NONLEAF_YOUNG)) {
+ if (!pmd_young(val))
+ continue;
- walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos);
+ walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos);
+ }
#endif
if (!walk->force_scan && !test_bloom_filter(walk->lruvec, walk->max_seq, pmd + i))
continue;
@@ -4309,7 +4321,7 @@ static bool try_to_inc_max_seq(struct lr
* handful of PTEs. Spreading the work out over a period of time usually
* is less efficient, but it avoids bursty page faults.
*/
- if (!arch_has_hw_pte_young()) {
+ if (!(arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK))) {
success = iterate_mm_list_nowalk(lruvec, max_seq);
goto done;
}
@@ -5072,6 +5084,208 @@ done:
}
/******************************************************************************
+ * state change
+ ******************************************************************************/
+
+static bool __maybe_unused state_is_valid(struct lruvec *lruvec)
+{
+ struct lru_gen_struct *lrugen = &lruvec->lrugen;
+
+ if (lrugen->enabled) {
+ enum lru_list lru;
+
+ for_each_evictable_lru(lru) {
+ if (!list_empty(&lruvec->lists[lru]))
+ return false;
+ }
+ } else {
+ int gen, type, zone;
+
+ for_each_gen_type_zone(gen, type, zone) {
+ if (!list_empty(&lrugen->lists[gen][type][zone]))
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static bool fill_evictable(struct lruvec *lruvec)
+{
+ enum lru_list lru;
+ int remaining = MAX_LRU_BATCH;
+
+ for_each_evictable_lru(lru) {
+ int type = is_file_lru(lru);
+ bool active = is_active_lru(lru);
+ struct list_head *head = &lruvec->lists[lru];
+
+ while (!list_empty(head)) {
+ bool success;
+ struct folio *folio = lru_to_folio(head);
+
+ VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
+ VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio) != active, folio);
+ VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio);
+ VM_WARN_ON_ONCE_FOLIO(folio_lru_gen(folio) != -1, folio);
+
+ lruvec_del_folio(lruvec, folio);
+ success = lru_gen_add_folio(lruvec, folio, false);
+ VM_WARN_ON_ONCE(!success);
+
+ if (!--remaining)
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static bool drain_evictable(struct lruvec *lruvec)
+{
+ int gen, type, zone;
+ int remaining = MAX_LRU_BATCH;
+
+ for_each_gen_type_zone(gen, type, zone) {
+ struct list_head *head = &lruvec->lrugen.lists[gen][type][zone];
+
+ while (!list_empty(head)) {
+ bool success;
+ struct folio *folio = lru_to_folio(head);
+
+ VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
+ VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
+ VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio);
+ VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio);
+
+ success = lru_gen_del_folio(lruvec, folio, false);
+ VM_WARN_ON_ONCE(!success);
+ lruvec_add_folio(lruvec, folio);
+
+ if (!--remaining)
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static void lru_gen_change_state(bool enabled)
+{
+ static DEFINE_MUTEX(state_mutex);
+
+ struct mem_cgroup *memcg;
+
+ cgroup_lock();
+ cpus_read_lock();
+ get_online_mems();
+ mutex_lock(&state_mutex);
+
+ if (enabled == lru_gen_enabled())
+ goto unlock;
+
+ if (enabled)
+ static_branch_enable_cpuslocked(&lru_gen_caps[LRU_GEN_CORE]);
+ else
+ static_branch_disable_cpuslocked(&lru_gen_caps[LRU_GEN_CORE]);
+
+ memcg = mem_cgroup_iter(NULL, NULL, NULL);
+ do {
+ int nid;
+
+ for_each_node(nid) {
+ struct lruvec *lruvec = get_lruvec(memcg, nid);
+
+ if (!lruvec)
+ continue;
+
+ spin_lock_irq(&lruvec->lru_lock);
+
+ VM_WARN_ON_ONCE(!seq_is_valid(lruvec));
+ VM_WARN_ON_ONCE(!state_is_valid(lruvec));
+
+ lruvec->lrugen.enabled = enabled;
+
+ while (!(enabled ? fill_evictable(lruvec) : drain_evictable(lruvec))) {
+ spin_unlock_irq(&lruvec->lru_lock);
+ cond_resched();
+ spin_lock_irq(&lruvec->lru_lock);
+ }
+
+ spin_unlock_irq(&lruvec->lru_lock);
+ }
+
+ cond_resched();
+ } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
+unlock:
+ mutex_unlock(&state_mutex);
+ put_online_mems();
+ cpus_read_unlock();
+ cgroup_unlock();
+}
+
+/******************************************************************************
+ * sysfs interface
+ ******************************************************************************/
+
+static ssize_t show_enabled(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ unsigned int caps = 0;
+
+ if (get_cap(LRU_GEN_CORE))
+ caps |= BIT(LRU_GEN_CORE);
+
+ if (arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK))
+ caps |= BIT(LRU_GEN_MM_WALK);
+
+ if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && get_cap(LRU_GEN_NONLEAF_YOUNG))
+ caps |= BIT(LRU_GEN_NONLEAF_YOUNG);
+
+ return snprintf(buf, PAGE_SIZE, "0x%04x\n", caps);
+}
+
+static ssize_t store_enabled(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ int i;
+ unsigned int caps;
+
+ if (tolower(*buf) == 'n')
+ caps = 0;
+ else if (tolower(*buf) == 'y')
+ caps = -1;
+ else if (kstrtouint(buf, 0, &caps))
+ return -EINVAL;
+
+ for (i = 0; i < NR_LRU_GEN_CAPS; i++) {
+ bool enabled = caps & BIT(i);
+
+ if (i == LRU_GEN_CORE)
+ lru_gen_change_state(enabled);
+ else if (enabled)
+ static_branch_enable(&lru_gen_caps[i]);
+ else
+ static_branch_disable(&lru_gen_caps[i]);
+ }
+
+ return len;
+}
+
+static struct kobj_attribute lru_gen_enabled_attr = __ATTR(
+ enabled, 0644, show_enabled, store_enabled
+);
+
+static struct attribute *lru_gen_attrs[] = {
+ &lru_gen_enabled_attr.attr,
+ NULL
+};
+
+static struct attribute_group lru_gen_attr_group = {
+ .name = "lru_gen",
+ .attrs = lru_gen_attrs,
+};
+
+/******************************************************************************
* initialization
******************************************************************************/
@@ -5081,6 +5295,7 @@ void lru_gen_init_lruvec(struct lruvec *
struct lru_gen_struct *lrugen = &lruvec->lrugen;
lrugen->max_seq = MIN_NR_GENS + 1;
+ lrugen->enabled = lru_gen_enabled();
for_each_gen_type_zone(gen, type, zone)
INIT_LIST_HEAD(&lrugen->lists[gen][type][zone]);
@@ -5120,6 +5335,9 @@ static int __init init_lru_gen(void)
BUILD_BUG_ON(MIN_NR_GENS + 1 >= MAX_NR_GENS);
BUILD_BUG_ON(BIT(LRU_GEN_WIDTH) <= MAX_NR_GENS);
+ if (sysfs_create_group(mm_kobj, &lru_gen_attr_group))
+ pr_err("lru_gen: failed to create sysfs group\n");
+
return 0;
};
late_initcall(init_lru_gen);

View File

@ -0,0 +1,202 @@
From 9d92c76fb8ac09ff195024139575d8c4db66b672 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com>
Date: Thu, 27 Jan 2022 20:08:50 -0700
Subject: [PATCH 11/14] mm: multi-gen LRU: thrashing prevention
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Add /sys/kernel/mm/lru_gen/min_ttl_ms for thrashing prevention, as
requested by many desktop users [1].
When set to value N, it prevents the working set of N milliseconds
from getting evicted. The OOM killer is triggered if this working set
cannot be kept in memory. Based on the average human detectable lag
(~100ms), N=1000 usually eliminates intolerable lags due to thrashing.
Larger values like N=3000 make lags less noticeable at the risk of
premature OOM kills.
Compared with the size-based approach [2], this time-based approach
has the following advantages:
1. It is easier to configure because it is agnostic to applications
and memory sizes.
2. It is more reliable because it is directly wired to the OOM killer.
[1] https://lore.kernel.org/r/Ydza%2FzXKY9ATRoh6@google.com/
[2] https://lore.kernel.org/r/20101028191523.GA14972@google.com/
Signed-off-by: Yu Zhao <yuzhao@google.com>
Acked-by: Brian Geffon <bgeffon@google.com>
Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Acked-by: Steven Barrett <steven@liquorix.net>
Acked-by: Suleiman Souhlal <suleiman@google.com>
Tested-by: Daniel Byrne <djbyrne@mtu.edu>
Tested-by: Donald Carr <d@chaos-reins.com>
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
Tested-by: Sofia Trinh <sofia.trinh@edi.works>
Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
Change-Id: I007499d7e47374b59fd620e8c3962940bc9f788e
---
include/linux/mmzone.h | 2 ++
mm/vmscan.c | 74 ++++++++++++++++++++++++++++++++++++++++--
2 files changed, 73 insertions(+), 3 deletions(-)
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -419,6 +419,8 @@ struct lru_gen_struct {
unsigned long max_seq;
/* the eviction increments the oldest generation numbers */
unsigned long min_seq[ANON_AND_FILE];
+ /* the birth time of each generation in jiffies */
+ unsigned long timestamps[MAX_NR_GENS];
/* the multi-gen LRU lists, lazily sorted on eviction */
struct list_head lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
/* the multi-gen LRU sizes, eventually consistent */
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4293,6 +4293,7 @@ static void inc_max_seq(struct lruvec *l
for (type = 0; type < ANON_AND_FILE; type++)
reset_ctrl_pos(lruvec, type, false);
+ WRITE_ONCE(lrugen->timestamps[next], jiffies);
/* make sure preceding modifications appear */
smp_store_release(&lrugen->max_seq, lrugen->max_seq + 1);
@@ -4420,7 +4421,7 @@ static bool should_run_aging(struct lruv
return false;
}
-static void age_lruvec(struct lruvec *lruvec, struct scan_control *sc)
+static bool age_lruvec(struct lruvec *lruvec, struct scan_control *sc, unsigned long min_ttl)
{
bool need_aging;
unsigned long nr_to_scan;
@@ -4434,16 +4435,36 @@ static void age_lruvec(struct lruvec *lr
mem_cgroup_calculate_protection(NULL, memcg);
if (mem_cgroup_below_min(memcg))
- return;
+ return false;
need_aging = should_run_aging(lruvec, max_seq, min_seq, sc, swappiness, &nr_to_scan);
+
+ if (min_ttl) {
+ int gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]);
+ unsigned long birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
+
+ if (time_is_after_jiffies(birth + min_ttl))
+ return false;
+
+ /* the size is likely too small to be helpful */
+ if (!nr_to_scan && sc->priority != DEF_PRIORITY)
+ return false;
+ }
+
if (need_aging)
try_to_inc_max_seq(lruvec, max_seq, sc, swappiness);
+
+ return true;
}
+/* to protect the working set of the last N jiffies */
+static unsigned long lru_gen_min_ttl __read_mostly;
+
static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
{
struct mem_cgroup *memcg;
+ bool success = false;
+ unsigned long min_ttl = READ_ONCE(lru_gen_min_ttl);
VM_WARN_ON_ONCE(!current_is_kswapd());
@@ -4466,12 +4487,32 @@ static void lru_gen_age_node(struct pgli
do {
struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
- age_lruvec(lruvec, sc);
+ if (age_lruvec(lruvec, sc, min_ttl))
+ success = true;
cond_resched();
} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
clear_mm_walk();
+
+ /* check the order to exclude compaction-induced reclaim */
+ if (success || !min_ttl || sc->order)
+ return;
+
+ /*
+ * The main goal is to OOM kill if every generation from all memcgs is
+ * younger than min_ttl. However, another possibility is all memcgs are
+ * either below min or empty.
+ */
+ if (mutex_trylock(&oom_lock)) {
+ struct oom_control oc = {
+ .gfp_mask = sc->gfp_mask,
+ };
+
+ out_of_memory(&oc);
+
+ mutex_unlock(&oom_lock);
+ }
}
/*
@@ -5228,6 +5269,28 @@ unlock:
* sysfs interface
******************************************************************************/
+static ssize_t show_min_ttl(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%u\n", jiffies_to_msecs(READ_ONCE(lru_gen_min_ttl)));
+}
+
+static ssize_t store_min_ttl(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ unsigned int msecs;
+
+ if (kstrtouint(buf, 0, &msecs))
+ return -EINVAL;
+
+ WRITE_ONCE(lru_gen_min_ttl, msecs_to_jiffies(msecs));
+
+ return len;
+}
+
+static struct kobj_attribute lru_gen_min_ttl_attr = __ATTR(
+ min_ttl_ms, 0644, show_min_ttl, store_min_ttl
+);
+
static ssize_t show_enabled(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
{
unsigned int caps = 0;
@@ -5276,6 +5339,7 @@ static struct kobj_attribute lru_gen_ena
);
static struct attribute *lru_gen_attrs[] = {
+ &lru_gen_min_ttl_attr.attr,
&lru_gen_enabled_attr.attr,
NULL
};
@@ -5291,12 +5355,16 @@ static struct attribute_group lru_gen_at
void lru_gen_init_lruvec(struct lruvec *lruvec)
{
+ int i;
int gen, type, zone;
struct lru_gen_struct *lrugen = &lruvec->lrugen;
lrugen->max_seq = MIN_NR_GENS + 1;
lrugen->enabled = lru_gen_enabled();
+ for (i = 0; i <= MIN_NR_GENS + 1; i++)
+ lrugen->timestamps[i] = jiffies;
+
for_each_gen_type_zone(gen, type, zone)
INIT_LIST_HEAD(&lrugen->lists[gen][type][zone]);

View File

@ -0,0 +1,557 @@
From d1e0e5fcdea16d4ceead496a0ea2fdbb6bc5bfe4 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com>
Date: Thu, 27 Jan 2022 20:12:41 -0700
Subject: [PATCH 12/14] mm: multi-gen LRU: debugfs interface
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Add /sys/kernel/debug/lru_gen for working set estimation and proactive
reclaim. These techniques are commonly used to optimize job scheduling
(bin packing) in data centers [1][2].
Compared with the page table-based approach and the PFN-based
approach, this lruvec-based approach has the following advantages:
1. It offers better choices because it is aware of memcgs, NUMA nodes,
shared mappings and unmapped page cache.
2. It is more scalable because it is O(nr_hot_pages), whereas the
PFN-based approach is O(nr_total_pages).
Add /sys/kernel/debug/lru_gen_full for debugging.
[1] https://dl.acm.org/doi/10.1145/3297858.3304053
[2] https://dl.acm.org/doi/10.1145/3503222.3507731
Signed-off-by: Yu Zhao <yuzhao@google.com>
Reviewed-by: Qi Zheng <zhengqi.arch@bytedance.com>
Acked-by: Brian Geffon <bgeffon@google.com>
Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Acked-by: Steven Barrett <steven@liquorix.net>
Acked-by: Suleiman Souhlal <suleiman@google.com>
Tested-by: Daniel Byrne <djbyrne@mtu.edu>
Tested-by: Donald Carr <d@chaos-reins.com>
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
Tested-by: Sofia Trinh <sofia.trinh@edi.works>
Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
Change-Id: I7bb06f14e0a94901a076cc3767d0855d4f1ea3ab
---
include/linux/nodemask.h | 1 +
mm/vmscan.c | 411 ++++++++++++++++++++++++++++++++++++++-
2 files changed, 402 insertions(+), 10 deletions(-)
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -493,6 +493,7 @@ static inline int num_node_state(enum no
#define first_online_node 0
#define first_memory_node 0
#define next_online_node(nid) (MAX_NUMNODES)
+#define next_memory_node(nid) (MAX_NUMNODES)
#define nr_node_ids 1U
#define nr_online_nodes 1U
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -52,6 +52,7 @@
#include <linux/pagewalk.h>
#include <linux/shmem_fs.h>
#include <linux/ctype.h>
+#include <linux/debugfs.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -4197,12 +4198,40 @@ static void clear_mm_walk(void)
kfree(walk);
}
-static void inc_min_seq(struct lruvec *lruvec, int type)
+static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap)
{
+ int zone;
+ int remaining = MAX_LRU_BATCH;
struct lru_gen_struct *lrugen = &lruvec->lrugen;
+ int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]);
+
+ if (type == LRU_GEN_ANON && !can_swap)
+ goto done;
+
+ /* prevent cold/hot inversion if force_scan is true */
+ for (zone = 0; zone < MAX_NR_ZONES; zone++) {
+ struct list_head *head = &lrugen->lists[old_gen][type][zone];
+
+ while (!list_empty(head)) {
+ struct folio *folio = lru_to_folio(head);
+
+ VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
+ VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
+ VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio);
+ VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio);
+ new_gen = folio_inc_gen(lruvec, folio, false);
+ list_move_tail(&folio->lru, &lrugen->lists[new_gen][type][zone]);
+
+ if (!--remaining)
+ return false;
+ }
+ }
+done:
reset_ctrl_pos(lruvec, type, true);
WRITE_ONCE(lrugen->min_seq[type], lrugen->min_seq[type] + 1);
+
+ return true;
}
static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap)
@@ -4248,7 +4277,7 @@ next:
return success;
}
-static void inc_max_seq(struct lruvec *lruvec, bool can_swap)
+static void inc_max_seq(struct lruvec *lruvec, bool can_swap, bool force_scan)
{
int prev, next;
int type, zone;
@@ -4262,9 +4291,13 @@ static void inc_max_seq(struct lruvec *l
if (get_nr_gens(lruvec, type) != MAX_NR_GENS)
continue;
- VM_WARN_ON_ONCE(type == LRU_GEN_FILE || can_swap);
+ VM_WARN_ON_ONCE(!force_scan && (type == LRU_GEN_FILE || can_swap));
- inc_min_seq(lruvec, type);
+ while (!inc_min_seq(lruvec, type, can_swap)) {
+ spin_unlock_irq(&lruvec->lru_lock);
+ cond_resched();
+ spin_lock_irq(&lruvec->lru_lock);
+ }
}
/*
@@ -4301,7 +4334,7 @@ static void inc_max_seq(struct lruvec *l
}
static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
- struct scan_control *sc, bool can_swap)
+ struct scan_control *sc, bool can_swap, bool force_scan)
{
bool success;
struct lru_gen_mm_walk *walk;
@@ -4322,7 +4355,7 @@ static bool try_to_inc_max_seq(struct lr
* handful of PTEs. Spreading the work out over a period of time usually
* is less efficient, but it avoids bursty page faults.
*/
- if (!(arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK))) {
+ if (!force_scan && !(arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK))) {
success = iterate_mm_list_nowalk(lruvec, max_seq);
goto done;
}
@@ -4336,7 +4369,7 @@ static bool try_to_inc_max_seq(struct lr
walk->lruvec = lruvec;
walk->max_seq = max_seq;
walk->can_swap = can_swap;
- walk->force_scan = false;
+ walk->force_scan = force_scan;
do {
success = iterate_mm_list(lruvec, walk, &mm);
@@ -4356,7 +4389,7 @@ done:
VM_WARN_ON_ONCE(max_seq != READ_ONCE(lrugen->max_seq));
- inc_max_seq(lruvec, can_swap);
+ inc_max_seq(lruvec, can_swap, force_scan);
/* either this sees any waiters or they will see updated max_seq */
if (wq_has_sleeper(&lruvec->mm_state.wait))
wake_up_all(&lruvec->mm_state.wait);
@@ -4452,7 +4485,7 @@ static bool age_lruvec(struct lruvec *lr
}
if (need_aging)
- try_to_inc_max_seq(lruvec, max_seq, sc, swappiness);
+ try_to_inc_max_seq(lruvec, max_seq, sc, swappiness, false);
return true;
}
@@ -5011,7 +5044,7 @@ static unsigned long get_nr_to_scan(stru
if (current_is_kswapd())
return 0;
- if (try_to_inc_max_seq(lruvec, max_seq, sc, can_swap))
+ if (try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false))
return nr_to_scan;
done:
return min_seq[!can_swap] + MIN_NR_GENS <= max_seq ? nr_to_scan : 0;
@@ -5350,6 +5383,361 @@ static struct attribute_group lru_gen_at
};
/******************************************************************************
+ * debugfs interface
+ ******************************************************************************/
+
+static void *lru_gen_seq_start(struct seq_file *m, loff_t *pos)
+{
+ struct mem_cgroup *memcg;
+ loff_t nr_to_skip = *pos;
+
+ m->private = kvmalloc(PATH_MAX, GFP_KERNEL);
+ if (!m->private)
+ return ERR_PTR(-ENOMEM);
+
+ memcg = mem_cgroup_iter(NULL, NULL, NULL);
+ do {
+ int nid;
+
+ for_each_node_state(nid, N_MEMORY) {
+ if (!nr_to_skip--)
+ return get_lruvec(memcg, nid);
+ }
+ } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
+
+ return NULL;
+}
+
+static void lru_gen_seq_stop(struct seq_file *m, void *v)
+{
+ if (!IS_ERR_OR_NULL(v))
+ mem_cgroup_iter_break(NULL, lruvec_memcg(v));
+
+ kvfree(m->private);
+ m->private = NULL;
+}
+
+static void *lru_gen_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ int nid = lruvec_pgdat(v)->node_id;
+ struct mem_cgroup *memcg = lruvec_memcg(v);
+
+ ++*pos;
+
+ nid = next_memory_node(nid);
+ if (nid == MAX_NUMNODES) {
+ memcg = mem_cgroup_iter(NULL, memcg, NULL);
+ if (!memcg)
+ return NULL;
+
+ nid = first_memory_node;
+ }
+
+ return get_lruvec(memcg, nid);
+}
+
+static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec,
+ unsigned long max_seq, unsigned long *min_seq,
+ unsigned long seq)
+{
+ int i;
+ int type, tier;
+ int hist = lru_hist_from_seq(seq);
+ struct lru_gen_struct *lrugen = &lruvec->lrugen;
+
+ for (tier = 0; tier < MAX_NR_TIERS; tier++) {
+ seq_printf(m, " %10d", tier);
+ for (type = 0; type < ANON_AND_FILE; type++) {
+ const char *s = " ";
+ unsigned long n[3] = {};
+
+ if (seq == max_seq) {
+ s = "RT ";
+ n[0] = READ_ONCE(lrugen->avg_refaulted[type][tier]);
+ n[1] = READ_ONCE(lrugen->avg_total[type][tier]);
+ } else if (seq == min_seq[type] || NR_HIST_GENS > 1) {
+ s = "rep";
+ n[0] = atomic_long_read(&lrugen->refaulted[hist][type][tier]);
+ n[1] = atomic_long_read(&lrugen->evicted[hist][type][tier]);
+ if (tier)
+ n[2] = READ_ONCE(lrugen->protected[hist][type][tier - 1]);
+ }
+
+ for (i = 0; i < 3; i++)
+ seq_printf(m, " %10lu%c", n[i], s[i]);
+ }
+ seq_putc(m, '\n');
+ }
+
+ seq_puts(m, " ");
+ for (i = 0; i < NR_MM_STATS; i++) {
+ const char *s = " ";
+ unsigned long n = 0;
+
+ if (seq == max_seq && NR_HIST_GENS == 1) {
+ s = "LOYNFA";
+ n = READ_ONCE(lruvec->mm_state.stats[hist][i]);
+ } else if (seq != max_seq && NR_HIST_GENS > 1) {
+ s = "loynfa";
+ n = READ_ONCE(lruvec->mm_state.stats[hist][i]);
+ }
+
+ seq_printf(m, " %10lu%c", n, s[i]);
+ }
+ seq_putc(m, '\n');
+}
+
+static int lru_gen_seq_show(struct seq_file *m, void *v)
+{
+ unsigned long seq;
+ bool full = !debugfs_real_fops(m->file)->write;
+ struct lruvec *lruvec = v;
+ struct lru_gen_struct *lrugen = &lruvec->lrugen;
+ int nid = lruvec_pgdat(lruvec)->node_id;
+ struct mem_cgroup *memcg = lruvec_memcg(lruvec);
+ DEFINE_MAX_SEQ(lruvec);
+ DEFINE_MIN_SEQ(lruvec);
+
+ if (nid == first_memory_node) {
+ const char *path = memcg ? m->private : "";
+
+#ifdef CONFIG_MEMCG
+ if (memcg)
+ cgroup_path(memcg->css.cgroup, m->private, PATH_MAX);
+#endif
+ seq_printf(m, "memcg %5hu %s\n", mem_cgroup_id(memcg), path);
+ }
+
+ seq_printf(m, " node %5d\n", nid);
+
+ if (!full)
+ seq = min_seq[LRU_GEN_ANON];
+ else if (max_seq >= MAX_NR_GENS)
+ seq = max_seq - MAX_NR_GENS + 1;
+ else
+ seq = 0;
+
+ for (; seq <= max_seq; seq++) {
+ int type, zone;
+ int gen = lru_gen_from_seq(seq);
+ unsigned long birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
+
+ seq_printf(m, " %10lu %10u", seq, jiffies_to_msecs(jiffies - birth));
+
+ for (type = 0; type < ANON_AND_FILE; type++) {
+ unsigned long size = 0;
+ char mark = full && seq < min_seq[type] ? 'x' : ' ';
+
+ for (zone = 0; zone < MAX_NR_ZONES; zone++)
+ size += max(READ_ONCE(lrugen->nr_pages[gen][type][zone]), 0L);
+
+ seq_printf(m, " %10lu%c", size, mark);
+ }
+
+ seq_putc(m, '\n');
+
+ if (full)
+ lru_gen_seq_show_full(m, lruvec, max_seq, min_seq, seq);
+ }
+
+ return 0;
+}
+
+static const struct seq_operations lru_gen_seq_ops = {
+ .start = lru_gen_seq_start,
+ .stop = lru_gen_seq_stop,
+ .next = lru_gen_seq_next,
+ .show = lru_gen_seq_show,
+};
+
+static int run_aging(struct lruvec *lruvec, unsigned long seq, struct scan_control *sc,
+ bool can_swap, bool force_scan)
+{
+ DEFINE_MAX_SEQ(lruvec);
+ DEFINE_MIN_SEQ(lruvec);
+
+ if (seq < max_seq)
+ return 0;
+
+ if (seq > max_seq)
+ return -EINVAL;
+
+ if (!force_scan && min_seq[!can_swap] + MAX_NR_GENS - 1 <= max_seq)
+ return -ERANGE;
+
+ try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, force_scan);
+
+ return 0;
+}
+
+static int run_eviction(struct lruvec *lruvec, unsigned long seq, struct scan_control *sc,
+ int swappiness, unsigned long nr_to_reclaim)
+{
+ DEFINE_MAX_SEQ(lruvec);
+
+ if (seq + MIN_NR_GENS > max_seq)
+ return -EINVAL;
+
+ sc->nr_reclaimed = 0;
+
+ while (!signal_pending(current)) {
+ DEFINE_MIN_SEQ(lruvec);
+
+ if (seq < min_seq[!swappiness])
+ return 0;
+
+ if (sc->nr_reclaimed >= nr_to_reclaim)
+ return 0;
+
+ if (!evict_folios(lruvec, sc, swappiness, NULL))
+ return 0;
+
+ cond_resched();
+ }
+
+ return -EINTR;
+}
+
+static int run_cmd(char cmd, int memcg_id, int nid, unsigned long seq,
+ struct scan_control *sc, int swappiness, unsigned long opt)
+{
+ struct lruvec *lruvec;
+ int err = -EINVAL;
+ struct mem_cgroup *memcg = NULL;
+
+ if (nid < 0 || nid >= MAX_NUMNODES || !node_state(nid, N_MEMORY))
+ return -EINVAL;
+
+ if (!mem_cgroup_disabled()) {
+ rcu_read_lock();
+ memcg = mem_cgroup_from_id(memcg_id);
+#ifdef CONFIG_MEMCG
+ if (memcg && !css_tryget(&memcg->css))
+ memcg = NULL;
+#endif
+ rcu_read_unlock();
+
+ if (!memcg)
+ return -EINVAL;
+ }
+
+ if (memcg_id != mem_cgroup_id(memcg))
+ goto done;
+
+ lruvec = get_lruvec(memcg, nid);
+
+ if (swappiness < 0)
+ swappiness = get_swappiness(lruvec, sc);
+ else if (swappiness > 200)
+ goto done;
+
+ switch (cmd) {
+ case '+':
+ err = run_aging(lruvec, seq, sc, swappiness, opt);
+ break;
+ case '-':
+ err = run_eviction(lruvec, seq, sc, swappiness, opt);
+ break;
+ }
+done:
+ mem_cgroup_put(memcg);
+
+ return err;
+}
+
+static ssize_t lru_gen_seq_write(struct file *file, const char __user *src,
+ size_t len, loff_t *pos)
+{
+ void *buf;
+ char *cur, *next;
+ unsigned int flags;
+ struct blk_plug plug;
+ int err = -EINVAL;
+ struct scan_control sc = {
+ .may_writepage = true,
+ .may_unmap = true,
+ .may_swap = true,
+ .reclaim_idx = MAX_NR_ZONES - 1,
+ .gfp_mask = GFP_KERNEL,
+ };
+
+ buf = kvmalloc(len + 1, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ if (copy_from_user(buf, src, len)) {
+ kvfree(buf);
+ return -EFAULT;
+ }
+
+ set_task_reclaim_state(current, &sc.reclaim_state);
+ flags = memalloc_noreclaim_save();
+ blk_start_plug(&plug);
+ if (!set_mm_walk(NULL)) {
+ err = -ENOMEM;
+ goto done;
+ }
+
+ next = buf;
+ next[len] = '\0';
+
+ while ((cur = strsep(&next, ",;\n"))) {
+ int n;
+ int end;
+ char cmd;
+ unsigned int memcg_id;
+ unsigned int nid;
+ unsigned long seq;
+ unsigned int swappiness = -1;
+ unsigned long opt = -1;
+
+ cur = skip_spaces(cur);
+ if (!*cur)
+ continue;
+
+ n = sscanf(cur, "%c %u %u %lu %n %u %n %lu %n", &cmd, &memcg_id, &nid,
+ &seq, &end, &swappiness, &end, &opt, &end);
+ if (n < 4 || cur[end]) {
+ err = -EINVAL;
+ break;
+ }
+
+ err = run_cmd(cmd, memcg_id, nid, seq, &sc, swappiness, opt);
+ if (err)
+ break;
+ }
+done:
+ clear_mm_walk();
+ blk_finish_plug(&plug);
+ memalloc_noreclaim_restore(flags);
+ set_task_reclaim_state(current, NULL);
+
+ kvfree(buf);
+
+ return err ? : len;
+}
+
+static int lru_gen_seq_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &lru_gen_seq_ops);
+}
+
+static const struct file_operations lru_gen_rw_fops = {
+ .open = lru_gen_seq_open,
+ .read = seq_read,
+ .write = lru_gen_seq_write,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+static const struct file_operations lru_gen_ro_fops = {
+ .open = lru_gen_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+/******************************************************************************
* initialization
******************************************************************************/
@@ -5406,6 +5794,9 @@ static int __init init_lru_gen(void)
if (sysfs_create_group(mm_kobj, &lru_gen_attr_group))
pr_err("lru_gen: failed to create sysfs group\n");
+ debugfs_create_file("lru_gen", 0644, NULL, NULL, &lru_gen_rw_fops);
+ debugfs_create_file("lru_gen_full", 0444, NULL, NULL, &lru_gen_ro_fops);
+
return 0;
};
late_initcall(init_lru_gen);

View File

@ -0,0 +1,253 @@
From 22199c9b30ffcc332be643577709a2af960e6786 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com>
Date: Sun, 23 Jan 2022 16:44:43 -0700
Subject: [PATCH 13/14] mm: multi-gen LRU: admin guide
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Add an admin guide.
Signed-off-by: Yu Zhao <yuzhao@google.com>
Acked-by: Brian Geffon <bgeffon@google.com>
Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Acked-by: Steven Barrett <steven@liquorix.net>
Acked-by: Suleiman Souhlal <suleiman@google.com>
Tested-by: Daniel Byrne <djbyrne@mtu.edu>
Tested-by: Donald Carr <d@chaos-reins.com>
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
Tested-by: Sofia Trinh <sofia.trinh@edi.works>
Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
Change-Id: I1902178bcbb5adfa0a748c4d284a6456059bdd7e
---
Documentation/admin-guide/mm/index.rst | 1 +
Documentation/admin-guide/mm/multigen_lru.rst | 162 ++++++++++++++++++
mm/Kconfig | 3 +-
mm/vmscan.c | 4 +
4 files changed, 169 insertions(+), 1 deletion(-)
create mode 100644 Documentation/admin-guide/mm/multigen_lru.rst
--- a/Documentation/admin-guide/mm/index.rst
+++ b/Documentation/admin-guide/mm/index.rst
@@ -32,6 +32,7 @@ the Linux memory management.
idle_page_tracking
ksm
memory-hotplug
+ multigen_lru
nommu-mmap
numa_memory_policy
numaperf
--- /dev/null
+++ b/Documentation/admin-guide/mm/multigen_lru.rst
@@ -0,0 +1,162 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============
+Multi-Gen LRU
+=============
+The multi-gen LRU is an alternative LRU implementation that optimizes
+page reclaim and improves performance under memory pressure. Page
+reclaim decides the kernel's caching policy and ability to overcommit
+memory. It directly impacts the kswapd CPU usage and RAM efficiency.
+
+Quick start
+===========
+Build the kernel with the following configurations.
+
+* ``CONFIG_LRU_GEN=y``
+* ``CONFIG_LRU_GEN_ENABLED=y``
+
+All set!
+
+Runtime options
+===============
+``/sys/kernel/mm/lru_gen/`` contains stable ABIs described in the
+following subsections.
+
+Kill switch
+-----------
+``enabled`` accepts different values to enable or disable the
+following components. Its default value depends on
+``CONFIG_LRU_GEN_ENABLED``. All the components should be enabled
+unless some of them have unforeseen side effects. Writing to
+``enabled`` has no effect when a component is not supported by the
+hardware, and valid values will be accepted even when the main switch
+is off.
+
+====== ===============================================================
+Values Components
+====== ===============================================================
+0x0001 The main switch for the multi-gen LRU.
+0x0002 Clearing the accessed bit in leaf page table entries in large
+ batches, when MMU sets it (e.g., on x86). This behavior can
+ theoretically worsen lock contention (mmap_lock). If it is
+ disabled, the multi-gen LRU will suffer a minor performance
+ degradation for workloads that contiguously map hot pages,
+ whose accessed bits can be otherwise cleared by fewer larger
+ batches.
+0x0004 Clearing the accessed bit in non-leaf page table entries as
+ well, when MMU sets it (e.g., on x86). This behavior was not
+ verified on x86 varieties other than Intel and AMD. If it is
+ disabled, the multi-gen LRU will suffer a negligible
+ performance degradation.
+[yYnN] Apply to all the components above.
+====== ===============================================================
+
+E.g.,
+::
+
+ echo y >/sys/kernel/mm/lru_gen/enabled
+ cat /sys/kernel/mm/lru_gen/enabled
+ 0x0007
+ echo 5 >/sys/kernel/mm/lru_gen/enabled
+ cat /sys/kernel/mm/lru_gen/enabled
+ 0x0005
+
+Thrashing prevention
+--------------------
+Personal computers are more sensitive to thrashing because it can
+cause janks (lags when rendering UI) and negatively impact user
+experience. The multi-gen LRU offers thrashing prevention to the
+majority of laptop and desktop users who do not have ``oomd``.
+
+Users can write ``N`` to ``min_ttl_ms`` to prevent the working set of
+``N`` milliseconds from getting evicted. The OOM killer is triggered
+if this working set cannot be kept in memory. In other words, this
+option works as an adjustable pressure relief valve, and when open, it
+terminates applications that are hopefully not being used.
+
+Based on the average human detectable lag (~100ms), ``N=1000`` usually
+eliminates intolerable janks due to thrashing. Larger values like
+``N=3000`` make janks less noticeable at the risk of premature OOM
+kills.
+
+The default value ``0`` means disabled.
+
+Experimental features
+=====================
+``/sys/kernel/debug/lru_gen`` accepts commands described in the
+following subsections. Multiple command lines are supported, so does
+concatenation with delimiters ``,`` and ``;``.
+
+``/sys/kernel/debug/lru_gen_full`` provides additional stats for
+debugging. ``CONFIG_LRU_GEN_STATS=y`` keeps historical stats from
+evicted generations in this file.
+
+Working set estimation
+----------------------
+Working set estimation measures how much memory an application needs
+in a given time interval, and it is usually done with little impact on
+the performance of the application. E.g., data centers want to
+optimize job scheduling (bin packing) to improve memory utilizations.
+When a new job comes in, the job scheduler needs to find out whether
+each server it manages can allocate a certain amount of memory for
+this new job before it can pick a candidate. To do so, the job
+scheduler needs to estimate the working sets of the existing jobs.
+
+When it is read, ``lru_gen`` returns a histogram of numbers of pages
+accessed over different time intervals for each memcg and node.
+``MAX_NR_GENS`` decides the number of bins for each histogram. The
+histograms are noncumulative.
+::
+
+ memcg memcg_id memcg_path
+ node node_id
+ min_gen_nr age_in_ms nr_anon_pages nr_file_pages
+ ...
+ max_gen_nr age_in_ms nr_anon_pages nr_file_pages
+
+Each bin contains an estimated number of pages that have been accessed
+within ``age_in_ms``. E.g., ``min_gen_nr`` contains the coldest pages
+and ``max_gen_nr`` contains the hottest pages, since ``age_in_ms`` of
+the former is the largest and that of the latter is the smallest.
+
+Users can write the following command to ``lru_gen`` to create a new
+generation ``max_gen_nr+1``:
+
+ ``+ memcg_id node_id max_gen_nr [can_swap [force_scan]]``
+
+``can_swap`` defaults to the swap setting and, if it is set to ``1``,
+it forces the scan of anon pages when swap is off, and vice versa.
+``force_scan`` defaults to ``1`` and, if it is set to ``0``, it
+employs heuristics to reduce the overhead, which is likely to reduce
+the coverage as well.
+
+A typical use case is that a job scheduler runs this command at a
+certain time interval to create new generations, and it ranks the
+servers it manages based on the sizes of their cold pages defined by
+this time interval.
+
+Proactive reclaim
+-----------------
+Proactive reclaim induces page reclaim when there is no memory
+pressure. It usually targets cold pages only. E.g., when a new job
+comes in, the job scheduler wants to proactively reclaim cold pages on
+the server it selected, to improve the chance of successfully landing
+this new job.
+
+Users can write the following command to ``lru_gen`` to evict
+generations less than or equal to ``min_gen_nr``.
+
+ ``- memcg_id node_id min_gen_nr [swappiness [nr_to_reclaim]]``
+
+``min_gen_nr`` should be less than ``max_gen_nr-1``, since
+``max_gen_nr`` and ``max_gen_nr-1`` are not fully aged (equivalent to
+the active list) and therefore cannot be evicted. ``swappiness``
+overrides the default value in ``/proc/sys/vm/swappiness``.
+``nr_to_reclaim`` limits the number of pages to evict.
+
+A typical use case is that a job scheduler runs this command before it
+tries to land a new job on a server. If it fails to materialize enough
+cold pages because of the overestimation, it retries on the next
+server according to the ranking result obtained from the working set
+estimation step. This less forceful approach limits the impacts on the
+existing jobs.
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -1131,7 +1131,8 @@ config LRU_GEN
# make sure folio->flags has enough spare bits
depends on 64BIT || !SPARSEMEM || SPARSEMEM_VMEMMAP
help
- A high performance LRU implementation to overcommit memory.
+ A high performance LRU implementation to overcommit memory. See
+ Documentation/admin-guide/mm/multigen_lru.rst for details.
config LRU_GEN_ENABLED
bool "Enable by default"
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -5307,6 +5307,7 @@ static ssize_t show_min_ttl(struct kobje
return sprintf(buf, "%u\n", jiffies_to_msecs(READ_ONCE(lru_gen_min_ttl)));
}
+/* see Documentation/admin-guide/mm/multigen_lru.rst for details */
static ssize_t store_min_ttl(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t len)
{
@@ -5340,6 +5341,7 @@ static ssize_t show_enabled(struct kobje
return snprintf(buf, PAGE_SIZE, "0x%04x\n", caps);
}
+/* see Documentation/admin-guide/mm/multigen_lru.rst for details */
static ssize_t store_enabled(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t len)
{
@@ -5487,6 +5489,7 @@ static void lru_gen_seq_show_full(struct
seq_putc(m, '\n');
}
+/* see Documentation/admin-guide/mm/multigen_lru.rst for details */
static int lru_gen_seq_show(struct seq_file *m, void *v)
{
unsigned long seq;
@@ -5645,6 +5648,7 @@ done:
return err;
}
+/* see Documentation/admin-guide/mm/multigen_lru.rst for details */
static ssize_t lru_gen_seq_write(struct file *file, const char __user *src,
size_t len, loff_t *pos)
{

View File

@ -0,0 +1,202 @@
From bd82a74f6b5c0a75ef61be5e9be34319bb17328f Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com>
Date: Sun, 6 Mar 2022 20:35:00 -0700
Subject: [PATCH 14/14] mm: multi-gen LRU: design doc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Add a design doc.
Signed-off-by: Yu Zhao <yuzhao@google.com>
Acked-by: Brian Geffon <bgeffon@google.com>
Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Acked-by: Steven Barrett <steven@liquorix.net>
Acked-by: Suleiman Souhlal <suleiman@google.com>
Tested-by: Daniel Byrne <djbyrne@mtu.edu>
Tested-by: Donald Carr <d@chaos-reins.com>
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
Tested-by: Sofia Trinh <sofia.trinh@edi.works>
Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
Change-Id: I958afcabf5abc37b3e58f72638d35a349c31b98d
---
Documentation/mm/index.rst | 1 +
Documentation/mm/multigen_lru.rst | 159 ++++++++++++++++++++++++++++++
2 files changed, 160 insertions(+)
create mode 100644 Documentation/mm/multigen_lru.rst
--- a/Documentation/mm/index.rst
+++ b/Documentation/mm/index.rst
@@ -51,6 +51,7 @@ above structured documentation, or delet
ksm
memory-model
mmu_notifier
+ multigen_lru
numa
overcommit-accounting
page_migration
--- /dev/null
+++ b/Documentation/mm/multigen_lru.rst
@@ -0,0 +1,159 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============
+Multi-Gen LRU
+=============
+The multi-gen LRU is an alternative LRU implementation that optimizes
+page reclaim and improves performance under memory pressure. Page
+reclaim decides the kernel's caching policy and ability to overcommit
+memory. It directly impacts the kswapd CPU usage and RAM efficiency.
+
+Design overview
+===============
+Objectives
+----------
+The design objectives are:
+
+* Good representation of access recency
+* Try to profit from spatial locality
+* Fast paths to make obvious choices
+* Simple self-correcting heuristics
+
+The representation of access recency is at the core of all LRU
+implementations. In the multi-gen LRU, each generation represents a
+group of pages with similar access recency. Generations establish a
+(time-based) common frame of reference and therefore help make better
+choices, e.g., between different memcgs on a computer or different
+computers in a data center (for job scheduling).
+
+Exploiting spatial locality improves efficiency when gathering the
+accessed bit. A rmap walk targets a single page and does not try to
+profit from discovering a young PTE. A page table walk can sweep all
+the young PTEs in an address space, but the address space can be too
+sparse to make a profit. The key is to optimize both methods and use
+them in combination.
+
+Fast paths reduce code complexity and runtime overhead. Unmapped pages
+do not require TLB flushes; clean pages do not require writeback.
+These facts are only helpful when other conditions, e.g., access
+recency, are similar. With generations as a common frame of reference,
+additional factors stand out. But obvious choices might not be good
+choices; thus self-correction is necessary.
+
+The benefits of simple self-correcting heuristics are self-evident.
+Again, with generations as a common frame of reference, this becomes
+attainable. Specifically, pages in the same generation can be
+categorized based on additional factors, and a feedback loop can
+statistically compare the refault percentages across those categories
+and infer which of them are better choices.
+
+Assumptions
+-----------
+The protection of hot pages and the selection of cold pages are based
+on page access channels and patterns. There are two access channels:
+
+* Accesses through page tables
+* Accesses through file descriptors
+
+The protection of the former channel is by design stronger because:
+
+1. The uncertainty in determining the access patterns of the former
+ channel is higher due to the approximation of the accessed bit.
+2. The cost of evicting the former channel is higher due to the TLB
+ flushes required and the likelihood of encountering the dirty bit.
+3. The penalty of underprotecting the former channel is higher because
+ applications usually do not prepare themselves for major page
+ faults like they do for blocked I/O. E.g., GUI applications
+ commonly use dedicated I/O threads to avoid blocking rendering
+ threads.
+
+There are also two access patterns:
+
+* Accesses exhibiting temporal locality
+* Accesses not exhibiting temporal locality
+
+For the reasons listed above, the former channel is assumed to follow
+the former pattern unless ``VM_SEQ_READ`` or ``VM_RAND_READ`` is
+present, and the latter channel is assumed to follow the latter
+pattern unless outlying refaults have been observed.
+
+Workflow overview
+=================
+Evictable pages are divided into multiple generations for each
+``lruvec``. The youngest generation number is stored in
+``lrugen->max_seq`` for both anon and file types as they are aged on
+an equal footing. The oldest generation numbers are stored in
+``lrugen->min_seq[]`` separately for anon and file types as clean file
+pages can be evicted regardless of swap constraints. These three
+variables are monotonically increasing.
+
+Generation numbers are truncated into ``order_base_2(MAX_NR_GENS+1)``
+bits in order to fit into the gen counter in ``folio->flags``. Each
+truncated generation number is an index to ``lrugen->lists[]``. The
+sliding window technique is used to track at least ``MIN_NR_GENS`` and
+at most ``MAX_NR_GENS`` generations. The gen counter stores a value
+within ``[1, MAX_NR_GENS]`` while a page is on one of
+``lrugen->lists[]``; otherwise it stores zero.
+
+Each generation is divided into multiple tiers. A page accessed ``N``
+times through file descriptors is in tier ``order_base_2(N)``. Unlike
+generations, tiers do not have dedicated ``lrugen->lists[]``. In
+contrast to moving across generations, which requires the LRU lock,
+moving across tiers only involves atomic operations on
+``folio->flags`` and therefore has a negligible cost. A feedback loop
+modeled after the PID controller monitors refaults over all the tiers
+from anon and file types and decides which tiers from which types to
+evict or protect.
+
+There are two conceptually independent procedures: the aging and the
+eviction. They form a closed-loop system, i.e., the page reclaim.
+
+Aging
+-----
+The aging produces young generations. Given an ``lruvec``, it
+increments ``max_seq`` when ``max_seq-min_seq+1`` approaches
+``MIN_NR_GENS``. The aging promotes hot pages to the youngest
+generation when it finds them accessed through page tables; the
+demotion of cold pages happens consequently when it increments
+``max_seq``. The aging uses page table walks and rmap walks to find
+young PTEs. For the former, it iterates ``lruvec_memcg()->mm_list``
+and calls ``walk_page_range()`` with each ``mm_struct`` on this list
+to scan PTEs, and after each iteration, it increments ``max_seq``. For
+the latter, when the eviction walks the rmap and finds a young PTE,
+the aging scans the adjacent PTEs. For both, on finding a young PTE,
+the aging clears the accessed bit and updates the gen counter of the
+page mapped by this PTE to ``(max_seq%MAX_NR_GENS)+1``.
+
+Eviction
+--------
+The eviction consumes old generations. Given an ``lruvec``, it
+increments ``min_seq`` when ``lrugen->lists[]`` indexed by
+``min_seq%MAX_NR_GENS`` becomes empty. To select a type and a tier to
+evict from, it first compares ``min_seq[]`` to select the older type.
+If both types are equally old, it selects the one whose first tier has
+a lower refault percentage. The first tier contains single-use
+unmapped clean pages, which are the best bet. The eviction sorts a
+page according to its gen counter if the aging has found this page
+accessed through page tables and updated its gen counter. It also
+moves a page to the next generation, i.e., ``min_seq+1``, if this page
+was accessed multiple times through file descriptors and the feedback
+loop has detected outlying refaults from the tier this page is in. To
+this end, the feedback loop uses the first tier as the baseline, for
+the reason stated earlier.
+
+Summary
+-------
+The multi-gen LRU can be disassembled into the following parts:
+
+* Generations
+* Rmap walks
+* Page table walks
+* Bloom filters
+* PID controller
+
+The aging and the eviction form a producer-consumer model;
+specifically, the latter drives the former by the sliding window over
+generations. Within the aging, rmap walks drive page table walks by
+inserting hot densely populated page tables to the Bloom filters.
+Within the eviction, the PID controller uses refaults as the feedback
+to select types to evict and tiers to protect.

View File

@ -3534,9 +3534,7 @@ CONFIG_MODULE_UNLOAD=y
# CONFIG_MPL115_SPI is not set
# CONFIG_MPL3115 is not set
# CONFIG_MPLS is not set
CONFIG_MPTCP=y
CONFIG_INET_MPTCP_DIAG=m
CONFIG_MPTCP_IPV6=y
# CONFIG_MPTCP is not set
# CONFIG_MPU3050_I2C is not set
# CONFIG_MQ_IOSCHED_DEADLINE is not set
# CONFIG_MQ_IOSCHED_KYBER is not set

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,174 @@
From a779a482fb9b9f8fcdf8b2519c789b4b9bb5dd05 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 7 Jul 2017 16:56:48 +0200
Subject: build: add a hack for removing non-essential module info
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
include/linux/module.h | 13 ++++++++-----
include/linux/moduleparam.h | 15 ++++++++++++---
init/Kconfig | 7 +++++++
kernel/module.c | 5 ++++-
scripts/mod/modpost.c | 12 ++++++++++++
5 files changed, 43 insertions(+), 9 deletions(-)
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -164,6 +164,7 @@ extern void cleanup_module(void);
/* Generic info of form tag = "info" */
#define MODULE_INFO(tag, info) __MODULE_INFO(tag, tag, info)
+#define MODULE_INFO_STRIP(tag, info) __MODULE_INFO_STRIP(tag, tag, info)
/* For userspace: you can also call me... */
#define MODULE_ALIAS(_alias) MODULE_INFO(alias, _alias)
@@ -233,12 +234,12 @@ extern void cleanup_module(void);
* Author(s), use "Name <email>" or just "Name", for multiple
* authors use multiple MODULE_AUTHOR() statements/lines.
*/
-#define MODULE_AUTHOR(_author) MODULE_INFO(author, _author)
+#define MODULE_AUTHOR(_author) MODULE_INFO_STRIP(author, _author)
/* What your module does. */
-#define MODULE_DESCRIPTION(_description) MODULE_INFO(description, _description)
+#define MODULE_DESCRIPTION(_description) MODULE_INFO_STRIP(description, _description)
-#ifdef MODULE
+#if defined(MODULE) && !defined(CONFIG_MODULE_STRIPPED)
/* Creates an alias so file2alias.c can find device table. */
#define MODULE_DEVICE_TABLE(type, name) \
extern typeof(name) __mod_##type##__##name##_device_table \
@@ -265,7 +266,9 @@ extern typeof(name) __mod_##type##__##na
*/
#if defined(MODULE) || !defined(CONFIG_SYSFS)
-#define MODULE_VERSION(_version) MODULE_INFO(version, _version)
+#define MODULE_VERSION(_version) MODULE_INFO_STRIP(version, _version)
+#elif defined(CONFIG_MODULE_STRIPPED)
+#define MODULE_VERSION(_version) __MODULE_INFO_DISABLED(version)
#else
#define MODULE_VERSION(_version) \
MODULE_INFO(version, _version); \
@@ -288,7 +291,7 @@ extern typeof(name) __mod_##type##__##na
/* Optional firmware file (or files) needed by the module
* format is simply firmware file name. Multiple firmware
* files require multiple MODULE_FIRMWARE() specifiers */
-#define MODULE_FIRMWARE(_firmware) MODULE_INFO(firmware, _firmware)
+#define MODULE_FIRMWARE(_firmware) MODULE_INFO_STRIP(firmware, _firmware)
#define MODULE_IMPORT_NS(ns) MODULE_INFO(import_ns, __stringify(ns))
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -20,6 +20,16 @@
/* Chosen so that structs with an unsigned long line up. */
#define MAX_PARAM_PREFIX_LEN (64 - sizeof(unsigned long))
+/* This struct is here for syntactic coherency, it is not used */
+#define __MODULE_INFO_DISABLED(name) \
+ struct __UNIQUE_ID(name) {}
+
+#ifdef CONFIG_MODULE_STRIPPED
+#define __MODULE_INFO_STRIP(tag, name, info) __MODULE_INFO_DISABLED(name)
+#else
+#define __MODULE_INFO_STRIP(tag, name, info) __MODULE_INFO(tag, name, info)
+#endif
+
#define __MODULE_INFO(tag, name, info) \
static const char __UNIQUE_ID(name)[] \
__used __section(".modinfo") __aligned(1) \
@@ -31,7 +41,7 @@
/* One for each parameter, describing how to use it. Some files do
multiple of these per line, so can't just use MODULE_INFO. */
#define MODULE_PARM_DESC(_parm, desc) \
- __MODULE_INFO(parm, _parm, #_parm ":" desc)
+ __MODULE_INFO_STRIP(parm, _parm, #_parm ":" desc)
struct kernel_param;
--- a/kernel/module/Kconfig
+++ b/kernel/module/Kconfig
@@ -286,6 +286,13 @@ config UNUSED_KSYMS_WHITELIST
one per line. The path can be absolute, or relative to the kernel
source tree.
+config MODULE_STRIPPED
+ bool "Reduce module size"
+ depends on MODULES
+ help
+ Remove module parameter descriptions, author info, version, aliases,
+ device tables, etc.
+
config MODULES_TREE_LOOKUP
def_bool y
depends on PERF_EVENTS || TRACING || CFI_CLANG
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -1954,9 +1954,11 @@ static int setup_load_info(struct load_i
static int check_modinfo(struct module *mod, struct load_info *info, int flags)
{
- const char *modmagic = get_modinfo(info, "vermagic");
int err;
+#ifndef CONFIG_MODULE_STRIPPED
+ const char *modmagic = get_modinfo(info, "vermagic");
+
if (flags & MODULE_INIT_IGNORE_VERMAGIC)
modmagic = NULL;
@@ -1977,6 +1979,7 @@ static int check_modinfo(struct module *
mod->name);
add_taint_module(mod, TAINT_OOT_MODULE, LOCKDEP_STILL_OK);
}
+#endif
check_modinfo_retpoline(mod, info);
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -1817,7 +1817,9 @@ static void read_symbols(const char *mod
symname = remove_dot(info.strtab + sym->st_name);
handle_symbol(mod, &info, sym, symname);
+#ifndef CONFIG_MODULE_STRIPPED
handle_moddevtable(mod, &info, sym, symname);
+#endif
}
for (sym = info.symtab_start; sym < info.symtab_stop; sym++) {
@@ -1980,8 +1982,10 @@ static void add_header(struct buffer *b,
buf_printf(b, "BUILD_SALT;\n");
buf_printf(b, "BUILD_LTO_INFO;\n");
buf_printf(b, "\n");
+#ifndef CONFIG_MODULE_STRIPPED
buf_printf(b, "MODULE_INFO(vermagic, VERMAGIC_STRING);\n");
buf_printf(b, "MODULE_INFO(name, KBUILD_MODNAME);\n");
+#endif
buf_printf(b, "\n");
buf_printf(b, "__visible struct module __this_module\n");
buf_printf(b, "__section(\".gnu.linkonce.this_module\") = {\n");
@@ -2101,11 +2105,13 @@ static void add_depends(struct buffer *b
static void add_srcversion(struct buffer *b, struct module *mod)
{
+#ifndef CONFIG_MODULE_STRIPPED
if (mod->srcversion[0]) {
buf_printf(b, "\n");
buf_printf(b, "MODULE_INFO(srcversion, \"%s\");\n",
mod->srcversion);
}
+#endif
}
static void write_buf(struct buffer *b, const char *fname)
@@ -2191,7 +2197,9 @@ static void write_mod_c_file(struct modu
add_exported_symbols(&buf, mod);
add_versions(&buf, mod);
add_depends(&buf, mod);
+#ifndef CONFIG_MODULE_STRIPPED
add_moddevtable(&buf, mod);
+#endif
add_srcversion(&buf, mod);
ret = snprintf(fname, sizeof(fname), "%s.mod.c", mod->name);

View File

@ -0,0 +1,11 @@
--- a/scripts/kconfig/conf.c
+++ b/scripts/kconfig/conf.c
@@ -432,6 +432,8 @@ static int conf_sym(struct menu *menu)
break;
continue;
case 0:
+ if (!sym_has_value(sym) && !tty_stdio && getenv("FAIL_ON_UNCONFIGURED"))
+ exit(1);
newval = oldval;
break;
case '?':

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,22 @@
From e44fc2af1ddc452b6659d08c16973d65c73b7d0a Mon Sep 17 00:00:00 2001
From: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
Date: Wed, 5 Feb 2020 18:36:43 +0000
Subject: [PATCH] file2alias: build on macos
Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
---
scripts/mod/file2alias.c | 3 +++
1 file changed, 3 insertions(+)
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -38,6 +38,9 @@ typedef struct {
__u8 b[16];
} guid_t;
+#ifdef __APPLE__
+#define uuid_t compat_uuid_t
+#endif
/* backwards compatibility, don't use in new code */
typedef struct {
__u8 b[16];

View File

@ -0,0 +1,83 @@
From 48232d3d931c95953ce2ddfe7da7bb164aef6a73 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 7 Jul 2017 17:03:16 +0200
Subject: fix portability of some includes files in tools/ used on the host
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
tools/include/tools/be_byteshift.h | 4 ++++
tools/include/tools/le_byteshift.h | 4 ++++
tools/include/tools/linux_types.h | 22 ++++++++++++++++++++++
3 files changed, 30 insertions(+)
create mode 100644 tools/include/tools/linux_types.h
--- a/tools/include/tools/be_byteshift.h
+++ b/tools/include/tools/be_byteshift.h
@@ -2,6 +2,10 @@
#ifndef _TOOLS_BE_BYTESHIFT_H
#define _TOOLS_BE_BYTESHIFT_H
+#ifndef __linux__
+#include "linux_types.h"
+#endif
+
#include <stdint.h>
static inline uint16_t __get_unaligned_be16(const uint8_t *p)
--- a/tools/include/tools/le_byteshift.h
+++ b/tools/include/tools/le_byteshift.h
@@ -2,6 +2,10 @@
#ifndef _TOOLS_LE_BYTESHIFT_H
#define _TOOLS_LE_BYTESHIFT_H
+#ifndef __linux__
+#include "linux_types.h"
+#endif
+
#include <stdint.h>
static inline uint16_t __get_unaligned_le16(const uint8_t *p)
--- /dev/null
+++ b/tools/include/tools/linux_types.h
@@ -0,0 +1,26 @@
+#ifndef __LINUX_TYPES_H
+#define __LINUX_TYPES_H
+
+#include <stdint.h>
+
+typedef int8_t __s8;
+typedef uint8_t __u8;
+typedef uint8_t __be8;
+typedef uint8_t __le8;
+
+typedef int16_t __s16;
+typedef uint16_t __u16;
+typedef uint16_t __be16;
+typedef uint16_t __le16;
+
+typedef int32_t __s32;
+typedef uint32_t __u32;
+typedef uint32_t __be32;
+typedef uint32_t __le32;
+
+typedef int64_t __s64;
+typedef uint64_t __u64;
+typedef uint64_t __be64;
+typedef uint64_t __le64;
+
+#endif
--- a/tools/include/linux/types.h
+++ b/tools/include/linux/types.h
@@ -10,8 +10,12 @@
#define __SANE_USERSPACE_TYPES__ /* For PPC64, to get LL64 types */
#endif
+#ifndef __linux__
+#include <tools/linux_types.h>
+#else
#include <asm/types.h>
#include <asm/posix_types.h>
+#endif
struct page;
struct kmem_cache;

View File

@ -0,0 +1,24 @@
From be9be95ff10e16a5b4ad36f903978d0cc5747024 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 7 Jul 2017 17:04:08 +0200
Subject: kernel: fix linux/spi/spidev.h portability issues with musl
Felix will try to get this define included into musl
lede-commit: 795e7cf60de19e7a076a46874fab7bb88b43bbff
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
include/uapi/linux/spi/spidev.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/include/uapi/linux/spi/spidev.h
+++ b/include/uapi/linux/spi/spidev.h
@@ -93,7 +93,7 @@ struct spi_ioc_transfer {
/* not all platforms use <asm-generic/ioctl.h> or _IOC_TYPECHECK() ... */
#define SPI_MSGSIZE(N) \
- ((((N)*(sizeof (struct spi_ioc_transfer))) < (1 << _IOC_SIZEBITS)) \
+ ((((N)*(sizeof (struct spi_ioc_transfer))) < (1 << 13)) \
? ((N)*(sizeof (struct spi_ioc_transfer))) : 0)
#define SPI_IOC_MESSAGE(N) _IOW(SPI_IOC_MAGIC, 0, char[SPI_MSGSIZE(N)])

View File

@ -0,0 +1,123 @@
From e3d8676f5722b7622685581e06e8f53e6138e3ab Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Sat, 15 Jul 2017 23:42:36 +0200
Subject: use -ffunction-sections, -fdata-sections and --gc-sections
In combination with kernel symbol export stripping this significantly reduces
the kernel image size. Used on both ARM and MIPS architectures.
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Jonas Gorski <jogo@openwrt.org>
Signed-off-by: Gabor Juhos <juhosg@openwrt.org>
---
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -121,6 +121,7 @@ config ARM
select HAVE_UID16
select HAVE_VIRT_CPU_ACCOUNTING_GEN
select IRQ_FORCED_THREADING
+ select HAVE_LD_DEAD_CODE_DATA_ELIMINATION
select MODULES_USE_ELF_REL
select NEED_DMA_MAP_STATE
select OF_EARLY_FLATTREE if OF
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -91,6 +91,7 @@ endif
ifeq ($(CONFIG_USE_OF),y)
OBJS += $(libfdt_objs) fdt_check_mem_start.o
endif
+KBUILD_CFLAGS_KERNEL := $(patsubst -f%-sections,,$(KBUILD_CFLAGS_KERNEL))
OBJS += lib1funcs.o ashldi3.o bswapsdi2.o
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -75,7 +75,7 @@ SECTIONS
. = ALIGN(4);
__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
__start___ex_table = .;
- ARM_MMU_KEEP(*(__ex_table))
+ KEEP(*(__ex_table))
__stop___ex_table = .;
}
@@ -100,24 +100,24 @@ SECTIONS
}
.init.arch.info : {
__arch_info_begin = .;
- *(.arch.info.init)
+ KEEP(*(.arch.info.init))
__arch_info_end = .;
}
.init.tagtable : {
__tagtable_begin = .;
- *(.taglist.init)
+ KEEP(*(.taglist.init))
__tagtable_end = .;
}
#ifdef CONFIG_SMP_ON_UP
.init.smpalt : {
__smpalt_begin = .;
- *(.alt.smp.init)
+ KEEP(*(.alt.smp.init))
__smpalt_end = .;
}
#endif
.init.pv_table : {
__pv_table_begin = .;
- *(.pv_table)
+ KEEP(*(.pv_table))
__pv_table_end = .;
}
--- a/arch/arm/include/asm/vmlinux.lds.h
+++ b/arch/arm/include/asm/vmlinux.lds.h
@@ -42,13 +42,13 @@
#define PROC_INFO \
. = ALIGN(4); \
__proc_info_begin = .; \
- *(.proc.info.init) \
+ KEEP(*(.proc.info.init)) \
__proc_info_end = .;
#define IDMAP_TEXT \
ALIGN_FUNCTION(); \
__idmap_text_start = .; \
- *(.idmap.text) \
+ KEEP(*(.idmap.text)) \
__idmap_text_end = .; \
#define ARM_DISCARD \
@@ -109,12 +109,12 @@
. = ALIGN(8); \
.ARM.unwind_idx : { \
__start_unwind_idx = .; \
- *(.ARM.exidx*) \
+ KEEP(*(.ARM.exidx*)) \
__stop_unwind_idx = .; \
} \
.ARM.unwind_tab : { \
__start_unwind_tab = .; \
- *(.ARM.extab*) \
+ KEEP(*(.ARM.extab*)) \
__stop_unwind_tab = .; \
}
@@ -126,7 +126,7 @@
__vectors_lma = .; \
OVERLAY 0xffff0000 : NOCROSSREFS AT(__vectors_lma) { \
.vectors { \
- *(.vectors) \
+ KEEP(*(.vectors)) \
} \
.vectors.bhb.loop8 { \
*(.vectors.bhb.loop8) \
@@ -144,7 +144,7 @@
\
__stubs_lma = .; \
.stubs ADDR(.vectors) + 0x1000 : AT(__stubs_lma) { \
- *(.stubs) \
+ KEEP(*(.stubs)) \
} \
ARM_LMA(__stubs, .stubs); \
. = __stubs_lma + SIZEOF(.stubs); \

View File

@ -0,0 +1,102 @@
From b14784e7883390c20ed3ff904892255404a5914b Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 7 Jul 2017 17:05:53 +0200
Subject: add an optional config option for stripping all unnecessary symbol exports from the kernel image
lede-commit: bb5a40c64b7c4f4848509fa0a6625055fc9e66cc
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
include/asm-generic/vmlinux.lds.h | 18 +++++++++++++++---
include/linux/export.h | 9 ++++++++-
scripts/Makefile.build | 2 +-
3 files changed, 24 insertions(+), 5 deletions(-)
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -81,6 +81,16 @@
#define RO_EXCEPTION_TABLE
#endif
+#ifndef SYMTAB_KEEP
+#define SYMTAB_KEEP KEEP(*(SORT(___ksymtab+*)))
+#define SYMTAB_KEEP_GPL KEEP(*(SORT(___ksymtab_gpl+*)))
+#endif
+
+#ifndef SYMTAB_DISCARD
+#define SYMTAB_DISCARD
+#define SYMTAB_DISCARD_GPL
+#endif
+
/* Align . to a 8 byte boundary equals to maximum function alignment. */
#define ALIGN_FUNCTION() . = ALIGN(8)
@@ -478,14 +488,14 @@
/* Kernel symbol table: Normal symbols */ \
__ksymtab : AT(ADDR(__ksymtab) - LOAD_OFFSET) { \
__start___ksymtab = .; \
- KEEP(*(SORT(___ksymtab+*))) \
+ SYMTAB_KEEP \
__stop___ksymtab = .; \
} \
\
/* Kernel symbol table: GPL-only symbols */ \
__ksymtab_gpl : AT(ADDR(__ksymtab_gpl) - LOAD_OFFSET) { \
__start___ksymtab_gpl = .; \
- KEEP(*(SORT(___ksymtab_gpl+*))) \
+ SYMTAB_KEEP_GPL \
__stop___ksymtab_gpl = .; \
} \
\
@@ -505,7 +515,7 @@
\
/* Kernel symbol table: strings */ \
__ksymtab_strings : AT(ADDR(__ksymtab_strings) - LOAD_OFFSET) { \
- *(__ksymtab_strings) \
+ *(__ksymtab_strings+*) \
} \
\
/* __*init sections */ \
@@ -1023,6 +1033,8 @@
#define COMMON_DISCARDS \
SANITIZER_DISCARDS \
+ SYMTAB_DISCARD \
+ SYMTAB_DISCARD_GPL \
*(.discard) \
*(.discard.*) \
*(.modinfo) \
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -72,6 +72,12 @@ struct kernel_symbol {
#else
+#ifdef MODULE
+#define __EXPORT_SUFFIX(sym)
+#else
+#define __EXPORT_SUFFIX(sym) "+" #sym
+#endif
+
/*
* For every exported symbol, do the following:
*
@@ -87,7 +93,7 @@ struct kernel_symbol {
extern typeof(sym) sym; \
extern const char __kstrtab_##sym[]; \
extern const char __kstrtabns_##sym[]; \
- asm(" .section \"__ksymtab_strings\",\"aMS\",%progbits,1 \n" \
+ asm(" .section \"__ksymtab_strings" __EXPORT_SUFFIX(sym) "\",\"aMS\",%progbits,1 \n" \
"__kstrtab_" #sym ": \n" \
" .asciz \"" #sym "\" \n" \
"__kstrtabns_" #sym ": \n" \
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -328,7 +328,7 @@ targets += $(real-dtb-y) $(lib-y) $(alwa
# Linker scripts preprocessor (.lds.S -> .lds)
# ---------------------------------------------------------------------------
quiet_cmd_cpp_lds_S = LDS $@
- cmd_cpp_lds_S = $(CPP) $(cpp_flags) -P -U$(ARCH) \
+ cmd_cpp_lds_S = $(CPP) $(EXTRA_LDSFLAGS) $(cpp_flags) -P -U$(ARCH) \
-D__ASSEMBLY__ -DLINKER_SCRIPT -o $@ $<
$(obj)/%.lds: $(src)/%.lds.S FORCE

View File

@ -0,0 +1,34 @@
From b3d00b452467f621317953d9e4c6f9ae8dcfd271 Mon Sep 17 00:00:00 2001
From: Imre Kaloz <kaloz@openwrt.org>
Date: Fri, 7 Jul 2017 17:06:55 +0200
Subject: use the openwrt lzma options for now
lede-commit: 548de949f392049420a6a1feeef118b30ab8ea8c
Signed-off-by: Imre Kaloz <kaloz@openwrt.org>
---
lib/decompress.c | 1 +
scripts/Makefile.lib | 2 +-
usr/gen_initramfs_list.sh | 10 +++++-----
3 files changed, 7 insertions(+), 6 deletions(-)
--- a/lib/decompress.c
+++ b/lib/decompress.c
@@ -53,6 +53,7 @@ static const struct compress_format comp
{ {0x1f, 0x9e}, "gzip", gunzip },
{ {0x42, 0x5a}, "bzip2", bunzip2 },
{ {0x5d, 0x00}, "lzma", unlzma },
+ { {0x6d, 0x00}, "lzma-openwrt", unlzma },
{ {0xfd, 0x37}, "xz", unxz },
{ {0x89, 0x4c}, "lzo", unlzo },
{ {0x02, 0x21}, "lz4", unlz4 },
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -421,7 +421,7 @@ quiet_cmd_bzip2_with_size = BZIP2 $@
# ---------------------------------------------------------------------------
quiet_cmd_lzma = LZMA $@
- cmd_lzma = cat $(real-prereqs) | $(LZMA) -9 > $@
+ cmd_lzma = { cat $(real-prereqs) | $(LZMA) e -d20 -lc1 -lp2 -pb2 -eos -si -so; $(size_append); } > $@
quiet_cmd_lzma_with_size = LZMA $@
cmd_lzma_with_size = { cat $(real-prereqs) | $(LZMA) -9; $(size_append); } > $@

View File

@ -0,0 +1,11 @@
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -315,7 +315,7 @@ config NET_IPVTI
on top.
config NET_UDP_TUNNEL
- tristate
+ tristate "IP: UDP tunneling support"
select NET_IP_TUNNEL
default n

View File

@ -0,0 +1,27 @@
From: Felix Fietkau <nbd@nbd.name>
Subject: hack: net: remove bogus netfilter dependencies
lede-commit: 589d2a377dee27d206fc3725325309cf649e4df6
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
net/netfilter/Kconfig | 2 --
1 file changed, 2 deletions(-)
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -253,7 +253,6 @@ config NF_CONNTRACK_FTP
config NF_CONNTRACK_H323
tristate "H.323 protocol support"
- depends on IPV6 || IPV6=n
depends on NETFILTER_ADVANCED
help
H.323 is a VoIP signalling protocol from ITU-T. As one of the most
@@ -1118,7 +1117,6 @@ config NETFILTER_XT_TARGET_SECMARK
config NETFILTER_XT_TARGET_TCPMSS
tristate '"TCPMSS" target support'
- depends on IPV6 || IPV6=n
default m if NETFILTER_ADVANCED=n
help
This option adds a `TCPMSS' target, which allows you to alter the

View File

@ -0,0 +1,199 @@
From da3c50704f14132f4adf80d48e9a4cd5d46e54c9 Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Fri, 7 Jul 2017 17:09:21 +0200
Subject: kconfig: owrt specifc dependencies
Signed-off-by: John Crispin <john@phrozen.org>
---
crypto/Kconfig | 10 +++++-----
drivers/bcma/Kconfig | 1 +
drivers/ssb/Kconfig | 3 ++-
lib/Kconfig | 8 ++++----
net/netfilter/Kconfig | 2 +-
net/wireless/Kconfig | 17 ++++++++++-------
sound/core/Kconfig | 4 ++--
7 files changed, 25 insertions(+), 20 deletions(-)
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -55,7 +55,7 @@ config CRYPTO_FIPS_VERSION
By default the KERNELRELEASE value is used.
config CRYPTO_ALGAPI
- tristate
+ tristate "ALGAPI"
select CRYPTO_ALGAPI2
help
This option provides the API for cryptographic algorithms.
@@ -64,7 +64,7 @@ config CRYPTO_ALGAPI2
tristate
config CRYPTO_AEAD
- tristate
+ tristate "AEAD"
select CRYPTO_AEAD2
select CRYPTO_ALGAPI
@@ -75,7 +75,7 @@ config CRYPTO_AEAD2
select CRYPTO_RNG2
config CRYPTO_SKCIPHER
- tristate
+ tristate "SKCIPHER"
select CRYPTO_SKCIPHER2
select CRYPTO_ALGAPI
@@ -85,7 +85,7 @@ config CRYPTO_SKCIPHER2
select CRYPTO_RNG2
config CRYPTO_HASH
- tristate
+ tristate "HASH"
select CRYPTO_HASH2
select CRYPTO_ALGAPI
@@ -94,7 +94,7 @@ config CRYPTO_HASH2
select CRYPTO_ALGAPI2
config CRYPTO_RNG
- tristate
+ tristate "RNG"
select CRYPTO_RNG2
select CRYPTO_ALGAPI
--- a/drivers/bcma/Kconfig
+++ b/drivers/bcma/Kconfig
@@ -16,6 +16,7 @@ if BCMA
# Support for Block-I/O. SELECT this from the driver that needs it.
config BCMA_BLOCKIO
bool
+ default y
config BCMA_HOST_PCI_POSSIBLE
bool
--- a/drivers/ssb/Kconfig
+++ b/drivers/ssb/Kconfig
@@ -29,6 +29,7 @@ config SSB_SPROM
config SSB_BLOCKIO
bool
depends on SSB
+ default y
config SSB_PCIHOST_POSSIBLE
bool
@@ -49,7 +50,7 @@ config SSB_PCIHOST
config SSB_B43_PCI_BRIDGE
bool
depends on SSB_PCIHOST
- default n
+ default y
config SSB_PCMCIAHOST_POSSIBLE
bool
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -456,16 +456,16 @@ config BCH_CONST_T
# Textsearch support is select'ed if needed
#
config TEXTSEARCH
- bool
+ bool "Textsearch support"
config TEXTSEARCH_KMP
- tristate
+ tristate "Textsearch KMP"
config TEXTSEARCH_BM
- tristate
+ tristate "Textsearch BM"
config TEXTSEARCH_FSM
- tristate
+ tristate "Textsearch FSM"
config BTREE
bool
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -22,7 +22,7 @@ config NETFILTER_SKIP_EGRESS
def_bool NETFILTER_EGRESS && (NET_CLS_ACT || IFB)
config NETFILTER_NETLINK
- tristate
+ tristate "Netfilter NFNETLINK interface"
config NETFILTER_FAMILY_BRIDGE
bool
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
config WIRELESS_EXT
- bool
+ bool "Wireless extensions"
config WEXT_CORE
def_bool y
@@ -12,10 +12,10 @@ config WEXT_PROC
depends on WEXT_CORE
config WEXT_SPY
- bool
+ bool "WEXT_SPY"
config WEXT_PRIV
- bool
+ bool "WEXT_PRIV"
config CFG80211
tristate "cfg80211 - wireless configuration API"
@@ -204,7 +204,7 @@ config CFG80211_WEXT_EXPORT
endif # CFG80211
config LIB80211
- tristate
+ tristate "LIB80211"
default n
help
This options enables a library of common routines used
@@ -213,17 +213,17 @@ config LIB80211
Drivers should select this themselves if needed.
config LIB80211_CRYPT_WEP
- tristate
+ tristate "LIB80211_CRYPT_WEP"
select CRYPTO_LIB_ARC4
config LIB80211_CRYPT_CCMP
- tristate
+ tristate "LIB80211_CRYPT_CCMP"
select CRYPTO
select CRYPTO_AES
select CRYPTO_CCM
config LIB80211_CRYPT_TKIP
- tristate
+ tristate "LIB80211_CRYPT_TKIP"
select CRYPTO_LIB_ARC4
config LIB80211_DEBUG
--- a/sound/core/Kconfig
+++ b/sound/core/Kconfig
@@ -17,7 +17,7 @@ config SND_DMAENGINE_PCM
tristate
config SND_HWDEP
- tristate
+ tristate "Sound hardware support"
config SND_SEQ_DEVICE
tristate
@@ -27,7 +27,7 @@ config SND_RAWMIDI
select SND_SEQ_DEVICE if SND_SEQUENCER != n
config SND_COMPRESS_OFFLOAD
- tristate
+ tristate "Compression offloading support"
config SND_JACK
bool

View File

@ -0,0 +1,23 @@
From 8c817e33be829c7249c2cfd59ff48ad5fac6a31d Mon Sep 17 00:00:00 2001
From: Sungbo Eo <mans0n@gorani.run>
Date: Fri, 7 Jul 2017 17:09:21 +0200
Subject: [PATCH] kconfig: solidify SATA_PMP config
SATA_PMP option in kernel config file disappears for every kernel_oldconfig refresh.
To prevent this, SATA_HOST is now selected automatically when SATA_PMP is enabled.
This patch can be dropped if SATA_MV is ever re-added into the config.
---
drivers/ata/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -112,7 +112,7 @@ config SATA_ZPODD
config SATA_PMP
bool "SATA Port Multiplier support"
- depends on SATA_HOST
+ select SATA_HOST
default y
help
This option adds support for SATA Port Multipliers

View File

@ -0,0 +1,22 @@
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1954,7 +1954,7 @@ config PADATA
bool
config ASN1
- tristate
+ tristate "ASN1"
help
Build a simple ASN.1 grammar compiler that produces a bytecode output
that can be interpreted by the ASN.1 stream decoder and used to
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -627,7 +627,7 @@ config LIBFDT
bool
config OID_REGISTRY
- tristate
+ tristate "OID"
help
Enable fast lookup object identifier registry.

View File

@ -0,0 +1,144 @@
From 811d9e2268a62b830cfe93cd8bc929afcb8b198b Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Sat, 15 Jul 2017 21:12:38 +0200
Subject: kernel: move regmap bloat out of the kernel image if it is only being used in modules
lede-commit: 96f39119815028073583e4fca3a9c5fe9141e998
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
drivers/base/regmap/Kconfig | 15 ++++++++++-----
drivers/base/regmap/Makefile | 12 ++++++++----
drivers/base/regmap/regmap.c | 3 +++
include/linux/regmap.h | 2 +-
4 files changed, 22 insertions(+), 10 deletions(-)
--- a/drivers/base/regmap/Kconfig
+++ b/drivers/base/regmap/Kconfig
@@ -4,10 +4,9 @@
# subsystems should select the appropriate symbols.
config REGMAP
- default y if (REGMAP_I2C || REGMAP_SPI || REGMAP_SPMI || REGMAP_W1 || REGMAP_AC97 || REGMAP_MMIO || REGMAP_IRQ || REGMAP_SOUNDWIRE || REGMAP_SOUNDWIRE_MBQ || REGMAP_SCCB || REGMAP_I3C || REGMAP_SPI_AVMM || REGMAP_MDIO)
select IRQ_DOMAIN if REGMAP_IRQ
select MDIO_BUS if REGMAP_MDIO
- bool
+ tristate
config REGCACHE_COMPRESSED
select LZO_COMPRESS
@@ -15,53 +14,67 @@ config REGCACHE_COMPRESSED
bool
config REGMAP_AC97
+ select REGMAP
tristate
config REGMAP_I2C
+ select REGMAP
tristate
depends on I2C
config REGMAP_SLIMBUS
+ select REGMAP
tristate
depends on SLIMBUS
config REGMAP_SPI
+ select REGMAP
tristate
depends on SPI
config REGMAP_SPMI
+ select REGMAP
tristate
depends on SPMI
config REGMAP_W1
+ select REGMAP
tristate
depends on W1
config REGMAP_MDIO
+ select REGMAP
tristate
config REGMAP_MMIO
+ select REGMAP
tristate
config REGMAP_IRQ
+ select REGMAP
bool
config REGMAP_SOUNDWIRE
+ select REGMAP
tristate
depends on SOUNDWIRE
config REGMAP_SOUNDWIRE_MBQ
+ select REGMAP
tristate
depends on SOUNDWIRE
config REGMAP_SCCB
+ select REGMAP
tristate
depends on I2C
config REGMAP_I3C
+ select REGMAP
tristate
depends on I3C
config REGMAP_SPI_AVMM
+ select REGMAP
tristate
depends on SPI
--- a/drivers/base/regmap/Makefile
+++ b/drivers/base/regmap/Makefile
@@ -2,10 +2,14 @@
# For include/trace/define_trace.h to include trace.h
CFLAGS_regmap.o := -I$(src)
-obj-$(CONFIG_REGMAP) += regmap.o regcache.o
-obj-$(CONFIG_REGMAP) += regcache-rbtree.o regcache-flat.o
-obj-$(CONFIG_REGCACHE_COMPRESSED) += regcache-lzo.o
-obj-$(CONFIG_DEBUG_FS) += regmap-debugfs.o
+regmap-core-objs = regmap.o regcache.o regcache-rbtree.o regcache-flat.o
+ifdef CONFIG_DEBUG_FS
+regmap-core-objs += regmap-debugfs.o
+endif
+ifdef CONFIG_REGCACHE_COMPRESSED
+regmap-core-objs += regcache-lzo.o
+endif
+obj-$(CONFIG_REGMAP) += regmap-core.o
obj-$(CONFIG_REGMAP_AC97) += regmap-ac97.o
obj-$(CONFIG_REGMAP_I2C) += regmap-i2c.o
obj-$(CONFIG_REGMAP_SLIMBUS) += regmap-slimbus.o
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -9,6 +9,7 @@
#include <linux/device.h>
#include <linux/slab.h>
#include <linux/export.h>
+#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/err.h>
#include <linux/property.h>
@@ -3384,3 +3385,5 @@ static int __init regmap_initcall(void)
return 0;
}
postcore_initcall(regmap_initcall);
+
+MODULE_LICENSE("GPL");
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -180,7 +180,7 @@ struct reg_sequence {
__ret ?: __tmp; \
})
-#ifdef CONFIG_REGMAP
+#if IS_REACHABLE(CONFIG_REGMAP)
enum regmap_endian {
/* Unspecified -> 0 -> Backwards compatible default */

View File

@ -0,0 +1,52 @@
From fd1799b0bf5efa46dd3e6dfbbf3955564807e508 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 7 Jul 2017 17:12:51 +0200
Subject: kernel: prevent cryptomgr from pulling in useless extra dependencies for tests that are not run
Reduces kernel size after LZMA by about 5k on MIPS
lede-commit: 044c316167e076479a344c59905e5b435b84a77f
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
crypto/Kconfig | 13 ++++++-------
crypto/algboss.c | 4 ++++
2 files changed, 10 insertions(+), 7 deletions(-)
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -142,13 +142,13 @@ config CRYPTO_MANAGER
cbc(aes).
config CRYPTO_MANAGER2
- def_tristate CRYPTO_MANAGER || (CRYPTO_MANAGER!=n && CRYPTO_ALGAPI=y)
- select CRYPTO_AEAD2
- select CRYPTO_HASH2
- select CRYPTO_SKCIPHER2
- select CRYPTO_AKCIPHER2
- select CRYPTO_KPP2
- select CRYPTO_ACOMP2
+ def_tristate CRYPTO_MANAGER || (CRYPTO_MANAGER!=n && CRYPTO_ALGAPI=y && !CRYPTO_MANAGER_DISABLE_TESTS)
+ select CRYPTO_AEAD2 if !CRYPTO_MANAGER_DISABLE_TESTS
+ select CRYPTO_HASH2 if !CRYPTO_MANAGER_DISABLE_TESTS
+ select CRYPTO_SKCIPHER2 if !CRYPTO_MANAGER_DISABLE_TESTS
+ select CRYPTO_AKCIPHER2 if !CRYPTO_MANAGER_DISABLE_TESTS
+ select CRYPTO_KPP2 if !CRYPTO_MANAGER_DISABLE_TESTS
+ select CRYPTO_ACOMP2 if !CRYPTO_MANAGER_DISABLE_TESTS
config CRYPTO_USER
tristate "Userspace cryptographic algorithm configuration"
--- a/crypto/algboss.c
+++ b/crypto/algboss.c
@@ -211,8 +211,12 @@ static int cryptomgr_schedule_test(struc
type = alg->cra_flags;
/* Do not test internal algorithms. */
+#ifdef CONFIG_CRYPTO_MANAGER_DISABLE_TESTS
+ type |= CRYPTO_ALG_TESTED;
+#else
if (type & CRYPTO_ALG_INTERNAL)
type |= CRYPTO_ALG_TESTED;
+#endif
param->type = type;

View File

@ -0,0 +1,15 @@
This makes it possible to select CONFIG_CRYPTO_LIB_ARC4 directly. We
need this to be able to compile this into the kernel and make use of it
from backports.
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -6,7 +6,7 @@ config CRYPTO_LIB_AES
tristate
config CRYPTO_LIB_ARC4
- tristate
+ tristate "ARC4 cipher library"
config CRYPTO_ARCH_HAVE_LIB_BLAKE2S
bool

View File

@ -0,0 +1,84 @@
From 236c1acdfef5958010ac9814a9872e0a46fd78ee Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Fri, 7 Jul 2017 17:13:44 +0200
Subject: rfkill: add fake rfkill support
allow building of modules depending on RFKILL even if RFKILL is not enabled.
Signed-off-by: John Crispin <john@phrozen.org>
---
include/linux/rfkill.h | 2 +-
net/Makefile | 2 +-
net/rfkill/Kconfig | 14 +++++++++-----
net/rfkill/Makefile | 2 +-
4 files changed, 12 insertions(+), 8 deletions(-)
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -64,7 +64,7 @@ struct rfkill_ops {
int (*set_block)(void *data, bool blocked);
};
-#if defined(CONFIG_RFKILL) || defined(CONFIG_RFKILL_MODULE)
+#if defined(CONFIG_RFKILL_FULL) || defined(CONFIG_RFKILL_FULL_MODULE)
/**
* rfkill_alloc - Allocate rfkill structure
* @name: name of the struct -- the string is not copied internally
--- a/net/Makefile
+++ b/net/Makefile
@@ -52,7 +52,7 @@ obj-$(CONFIG_TIPC) += tipc/
obj-$(CONFIG_NETLABEL) += netlabel/
obj-$(CONFIG_IUCV) += iucv/
obj-$(CONFIG_SMC) += smc/
-obj-$(CONFIG_RFKILL) += rfkill/
+obj-$(CONFIG_RFKILL_FULL) += rfkill/
obj-$(CONFIG_NET_9P) += 9p/
obj-$(CONFIG_CAIF) += caif/
obj-$(CONFIG_DCB) += dcb/
--- a/net/rfkill/Kconfig
+++ b/net/rfkill/Kconfig
@@ -2,7 +2,11 @@
#
# RF switch subsystem configuration
#
-menuconfig RFKILL
+config RFKILL
+ bool
+ default y
+
+menuconfig RFKILL_FULL
tristate "RF switch subsystem support"
help
Say Y here if you want to have control over RF switches
@@ -14,19 +18,19 @@ menuconfig RFKILL
# LED trigger support
config RFKILL_LEDS
bool
- depends on RFKILL
+ depends on RFKILL_FULL
depends on LEDS_TRIGGERS = y || RFKILL = LEDS_TRIGGERS
default y
config RFKILL_INPUT
bool "RF switch input support" if EXPERT
- depends on RFKILL
+ depends on RFKILL_FULL
depends on INPUT = y || RFKILL = INPUT
default y if !EXPERT
config RFKILL_GPIO
tristate "GPIO RFKILL driver"
- depends on RFKILL
+ depends on RFKILL_FULL
depends on GPIOLIB || COMPILE_TEST
default n
help
--- a/net/rfkill/Makefile
+++ b/net/rfkill/Makefile
@@ -5,5 +5,5 @@
rfkill-y += core.o
rfkill-$(CONFIG_RFKILL_INPUT) += input.o
-obj-$(CONFIG_RFKILL) += rfkill.o
+obj-$(CONFIG_RFKILL_FULL) += rfkill.o
obj-$(CONFIG_RFKILL_GPIO) += rfkill-gpio.o

View File

@ -0,0 +1,64 @@
From: Ben Menchaca <ben.menchaca@qca.qualcomm.com>
Date: Fri, 7 Jun 2013 18:35:22 -0500
Subject: MIPS: r4k_cache: use more efficient cache blast
Optimize the compiler output for larger cache blast cases that are
common for DMA-based networking.
Signed-off-by: Ben Menchaca <ben.menchaca@qca.qualcomm.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/arch/mips/include/asm/r4kcache.h
+++ b/arch/mips/include/asm/r4kcache.h
@@ -286,14 +286,46 @@ static inline void prot##extra##blast_##
unsigned long end) \
{ \
unsigned long lsize = cpu_##desc##_line_size(); \
+ unsigned long lsize_2 = lsize * 2; \
+ unsigned long lsize_3 = lsize * 3; \
+ unsigned long lsize_4 = lsize * 4; \
+ unsigned long lsize_5 = lsize * 5; \
+ unsigned long lsize_6 = lsize * 6; \
+ unsigned long lsize_7 = lsize * 7; \
+ unsigned long lsize_8 = lsize * 8; \
unsigned long addr = start & ~(lsize - 1); \
- unsigned long aend = (end - 1) & ~(lsize - 1); \
+ unsigned long aend = (end + lsize - 1) & ~(lsize - 1); \
+ int lines = (aend - addr) / lsize; \
\
- while (1) { \
+ while (lines >= 8) { \
+ prot##cache_op(hitop, addr); \
+ prot##cache_op(hitop, addr + lsize); \
+ prot##cache_op(hitop, addr + lsize_2); \
+ prot##cache_op(hitop, addr + lsize_3); \
+ prot##cache_op(hitop, addr + lsize_4); \
+ prot##cache_op(hitop, addr + lsize_5); \
+ prot##cache_op(hitop, addr + lsize_6); \
+ prot##cache_op(hitop, addr + lsize_7); \
+ addr += lsize_8; \
+ lines -= 8; \
+ } \
+ \
+ if (lines & 0x4) { \
+ prot##cache_op(hitop, addr); \
+ prot##cache_op(hitop, addr + lsize); \
+ prot##cache_op(hitop, addr + lsize_2); \
+ prot##cache_op(hitop, addr + lsize_3); \
+ addr += lsize_4; \
+ } \
+ \
+ if (lines & 0x2) { \
+ prot##cache_op(hitop, addr); \
+ prot##cache_op(hitop, addr + lsize); \
+ addr += lsize_2; \
+ } \
+ \
+ if (lines & 0x1) { \
prot##cache_op(hitop, addr); \
- if (addr == aend) \
- break; \
- addr += lsize; \
} \
}

View File

@ -0,0 +1,38 @@
From: John Crispin <john@phrozen.org>
Subject: hack: kernel: add generic image_cmdline hack to MIPS targets
lede-commit: d59f5b3a987a48508257a0ddbaeadc7909f9f976
Signed-off-by: Gabor Juhos <juhosg@openwrt.org>
---
arch/mips/Kconfig | 4 ++++
arch/mips/kernel/head.S | 6 ++++++
2 files changed, 10 insertions(+)
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1112,6 +1112,10 @@ config MIPS_MSC
config SYNC_R4K
bool
+config IMAGE_CMDLINE_HACK
+ bool "OpenWrt specific image command line hack"
+ default n
+
config NO_IOPORT_MAP
def_bool n
--- a/arch/mips/kernel/head.S
+++ b/arch/mips/kernel/head.S
@@ -79,6 +79,12 @@ FEXPORT(__kernel_entry)
j kernel_entry
#endif /* CONFIG_BOOT_RAW */
+#ifdef CONFIG_IMAGE_CMDLINE_HACK
+ .ascii "CMDLINE:"
+EXPORT(__image_cmdline)
+ .fill 0x400
+#endif /* CONFIG_IMAGE_CMDLINE_HACK */
+
__REF
NESTED(kernel_entry, 16, sp) # kernel entry point

View File

@ -0,0 +1,38 @@
From 107c0964cb8db7ca28ac5199426414fdab3c274d Mon Sep 17 00:00:00 2001
From: "Alexandros C. Couloumbis" <alex@ozo.com>
Date: Fri, 7 Jul 2017 17:14:51 +0200
Subject: hack: arch: powerpc: drop register save/restore library from modules
Upstream GCC uses a libgcc function for saving/restoring registers. This
makes the code bigger, and upstream kernels need to carry that function
for every single kernel module. Our GCC is patched to avoid those
references, so we can drop the extra bloat for modules.
lede-commit: e8e1084654f50904e6bf77b70b2de3f137d7b3ec
Signed-off-by: Alexandros C. Couloumbis <alex@ozo.com>
---
arch/powerpc/Makefile | 1 -
1 file changed, 1 deletion(-)
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -42,19 +42,6 @@ machine-$(CONFIG_PPC64) += 64
machine-$(CONFIG_CPU_LITTLE_ENDIAN) += le
UTS_MACHINE := $(subst $(space),,$(machine-y))
-# XXX This needs to be before we override LD below
-ifdef CONFIG_PPC32
-KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
-else
-ifeq ($(call ld-ifversion, -ge, 22500, y),y)
-# Have the linker provide sfpr if possible.
-# There is a corresponding test in arch/powerpc/lib/Makefile
-KBUILD_LDFLAGS_MODULE += --save-restore-funcs
-else
-KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
-endif
-endif
-
ifdef CONFIG_CPU_LITTLE_ENDIAN
KBUILD_CFLAGS += -mlittle-endian
KBUILD_LDFLAGS += -EL

View File

@ -0,0 +1,196 @@
--- a/block/blk.h
+++ b/block/blk.h
@@ -406,6 +406,8 @@ void blk_free_ext_minor(unsigned int min
#define ADDPART_FLAG_NONE 0
#define ADDPART_FLAG_RAID 1
#define ADDPART_FLAG_WHOLEDISK 2
+#define ADDPART_FLAG_READONLY 4
+#define ADDPART_FLAG_ROOTDEV 8
int bdev_add_partition(struct gendisk *disk, int partno, sector_t start,
sector_t length);
int bdev_del_partition(struct gendisk *disk, int partno);
--- a/block/partitions/Kconfig
+++ b/block/partitions/Kconfig
@@ -103,6 +103,13 @@ config ATARI_PARTITION
Say Y here if you would like to use hard disks under Linux which
were partitioned under the Atari OS.
+config FIT_PARTITION
+ bool "Flattened-Image-Tree (FIT) partition support" if PARTITION_ADVANCED
+ default n
+ help
+ Say Y here if your system needs to mount the filesystem part of
+ a Flattened-Image-Tree (FIT) image commonly used with Das U-Boot.
+
config IBM_PARTITION
bool "IBM disk label and partition support"
depends on PARTITION_ADVANCED && S390
--- a/block/partitions/Makefile
+++ b/block/partitions/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_ACORN_PARTITION) += acorn.o
obj-$(CONFIG_AMIGA_PARTITION) += amiga.o
obj-$(CONFIG_ATARI_PARTITION) += atari.o
obj-$(CONFIG_AIX_PARTITION) += aix.o
+obj-$(CONFIG_FIT_PARTITION) += fit.o
obj-$(CONFIG_CMDLINE_PARTITION) += cmdline.o
obj-$(CONFIG_MAC_PARTITION) += mac.o
obj-$(CONFIG_LDM_PARTITION) += ldm.o
--- a/block/partitions/check.h
+++ b/block/partitions/check.h
@@ -57,6 +57,7 @@ int amiga_partition(struct parsed_partit
int atari_partition(struct parsed_partitions *state);
int cmdline_partition(struct parsed_partitions *state);
int efi_partition(struct parsed_partitions *state);
+int fit_partition(struct parsed_partitions *state);
int ibm_partition(struct parsed_partitions *);
int karma_partition(struct parsed_partitions *state);
int ldm_partition(struct parsed_partitions *state);
@@ -67,3 +68,5 @@ int sgi_partition(struct parsed_partitio
int sun_partition(struct parsed_partitions *state);
int sysv68_partition(struct parsed_partitions *state);
int ultrix_partition(struct parsed_partitions *state);
+
+int parse_fit_partitions(struct parsed_partitions *state, u64 start_sector, u64 nr_sectors, int *slot, int add_remain);
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -10,6 +10,10 @@
#include <linux/ctype.h>
#include <linux/vmalloc.h>
#include <linux/raid/detect.h>
+#ifdef CONFIG_FIT_PARTITION
+#include <linux/root_dev.h>
+#endif
+
#include "check.h"
static int (*check_part[])(struct parsed_partitions *) = {
@@ -46,6 +50,9 @@ static int (*check_part[])(struct parsed
#ifdef CONFIG_EFI_PARTITION
efi_partition, /* this must come before msdos */
#endif
+#ifdef CONFIG_FIT_PARTITION
+ fit_partition,
+#endif
#ifdef CONFIG_SGI_PARTITION
sgi_partition,
#endif
@@ -398,6 +405,11 @@ static struct block_device *add_partitio
goto out_del;
}
+#ifdef CONFIG_FIT_PARTITION
+ if (flags & ADDPART_FLAG_READONLY)
+ bdev->bd_read_only = true;
+#endif
+
/* everything is up and running, commence */
err = xa_insert(&disk->part_tbl, partno, bdev, GFP_KERNEL);
if (err)
@@ -585,6 +597,11 @@ static bool blk_add_partition(struct gen
(state->parts[p].flags & ADDPART_FLAG_RAID))
md_autodetect_dev(part->bd_dev);
+#ifdef CONFIG_FIT_PARTITION
+ if ((state->parts[p].flags & ADDPART_FLAG_ROOTDEV) && ROOT_DEV == 0)
+ ROOT_DEV = part->bd_dev;
+#endif
+
return true;
}
--- a/drivers/mtd/ubi/block.c
+++ b/drivers/mtd/ubi/block.c
@@ -433,6 +433,9 @@ int ubiblock_create(struct ubi_volume_in
}
gd->flags |= GENHD_FL_NO_PART;
gd->private_data = dev;
+#ifdef CONFIG_FIT_PARTITION
+ gd->flags |= GENHD_FL_EXT_DEVT;
+#endif
sprintf(gd->disk_name, "ubiblock%d_%d", dev->ubi_num, dev->vol_id);
set_capacity(gd, disk_capacity);
dev->gd = gd;
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -346,6 +346,9 @@ int add_mtd_blktrans_dev(struct mtd_blkt
gd->first_minor = (new->devnum) << tr->part_bits;
gd->minors = 1 << tr->part_bits;
gd->fops = &mtd_block_ops;
+#ifdef CONFIG_FIT_PARTITION
+ gd->flags |= GENHD_FL_EXT_DEVT;
+#endif
if (tr->part_bits) {
if (new->devnum < 26)
--- a/block/partitions/efi.c
+++ b/block/partitions/efi.c
@@ -716,6 +716,9 @@ int efi_partition(struct parsed_partitio
gpt_entry *ptes = NULL;
u32 i;
unsigned ssz = queue_logical_block_size(state->disk->queue) / 512;
+#ifdef CONFIG_FIT_PARTITION
+ u32 extra_slot = 64;
+#endif
if (!find_valid_gpt(state, &gpt, &ptes) || !gpt || !ptes) {
kfree(gpt);
@@ -749,6 +752,11 @@ int efi_partition(struct parsed_partitio
ARRAY_SIZE(ptes[i].partition_name));
utf16_le_to_7bit(ptes[i].partition_name, label_max, info->volname);
state->parts[i + 1].has_info = true;
+#ifdef CONFIG_FIT_PARTITION
+ /* If this is a U-Boot FIT volume it may have subpartitions */
+ if (!efi_guidcmp(ptes[i].partition_type_guid, PARTITION_LINUX_FIT_GUID))
+ (void) parse_fit_partitions(state, start * ssz, size * ssz, &extra_slot, 1);
+#endif
}
kfree(ptes);
kfree(gpt);
--- a/block/partitions/efi.h
+++ b/block/partitions/efi.h
@@ -51,6 +51,9 @@
#define PARTITION_LINUX_LVM_GUID \
EFI_GUID( 0xe6d6d379, 0xf507, 0x44c2, \
0xa2, 0x3c, 0x23, 0x8f, 0x2a, 0x3d, 0xf9, 0x28)
+#define PARTITION_LINUX_FIT_GUID \
+ EFI_GUID( 0xcae9be83, 0xb15f, 0x49cc, \
+ 0x86, 0x3f, 0x08, 0x1b, 0x74, 0x4a, 0x2d, 0x93)
typedef struct _gpt_header {
__le64 signature;
--- a/block/partitions/msdos.c
+++ b/block/partitions/msdos.c
@@ -564,6 +564,15 @@ static void parse_minix(struct parsed_pa
#endif /* CONFIG_MINIX_SUBPARTITION */
}
+static void parse_fit_mbr(struct parsed_partitions *state,
+ sector_t offset, sector_t size, int origin)
+{
+#ifdef CONFIG_FIT_PARTITION
+ u32 extra_slot = 64;
+ (void) parse_fit_partitions(state, offset, size, &extra_slot, 1);
+#endif /* CONFIG_FIT_PARTITION */
+}
+
static struct {
unsigned char id;
void (*parse)(struct parsed_partitions *, sector_t, sector_t, int);
@@ -575,6 +584,7 @@ static struct {
{UNIXWARE_PARTITION, parse_unixware},
{SOLARIS_X86_PARTITION, parse_solaris_x86},
{NEW_SOLARIS_X86_PARTITION, parse_solaris_x86},
+ {FIT_PARTITION, parse_fit_mbr},
{0, NULL},
};
--- a/include/linux/msdos_partition.h
+++ b/include/linux/msdos_partition.h
@@ -31,6 +31,7 @@ enum msdos_sys_ind {
LINUX_LVM_PARTITION = 0x8e,
LINUX_RAID_PARTITION = 0xfd, /* autodetect RAID partition */
+ FIT_PARTITION = 0x2e, /* U-Boot uImage.FIT */
SOLARIS_X86_PARTITION = 0x82, /* also Linux swap partitions */
NEW_SOLARIS_X86_PARTITION = 0xbf,

View File

@ -0,0 +1,39 @@
From: Gabor Juhos <juhosg@openwrt.org>
Subject: kernel/3.1[02]: move MTD root device setup code to mtdcore
The current code only allows to automatically set
root device on MTD partitions. Move the code to MTD
core to allow to use it with all MTD devices.
Signed-off-by: Gabor Juhos <juhosg@openwrt.org>
---
drivers/mtd/mtdcore.c | 10 ++++++++++
1 file changed, 10 insertions(+)
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -27,6 +27,7 @@
#include <linux/reboot.h>
#include <linux/leds.h>
#include <linux/debugfs.h>
+#include <linux/root_dev.h>
#include <linux/nvmem-provider.h>
#include <linux/mtd/mtd.h>
@@ -748,6 +749,16 @@ int add_mtd_device(struct mtd_info *mtd)
of this try_ nonsense, and no bitching about it
either. :) */
__module_get(THIS_MODULE);
+
+ if (!strcmp(mtd->name, "rootfs") &&
+ IS_ENABLED(CONFIG_MTD_ROOTFS_ROOT_DEV) &&
+ ROOT_DEV == 0) {
+ unsigned int index = mtd->index;
+ pr_notice("mtd: device %d (%s) set to be root filesystem\n",
+ mtd->index, mtd->name);
+ ROOT_DEV = MKDEV(MTD_BLOCK_MAJOR, index);
+ }
+
return 0;
fail_nvmem_add:

View File

@ -0,0 +1,120 @@
From 6fa9e3678eb002246df1280322b6a024853950a5 Mon Sep 17 00:00:00 2001
From: Ansuel Smith <ansuelsmth@gmail.com>
Date: Mon, 11 Oct 2021 00:53:14 +0200
Subject: [PATCH] drivers: mtd: parsers: add nvmem support to cmdlinepart
Assuming cmdlinepart is only one level deep partition scheme and that
static partition are also defined in DTS, we can assign an of_node for
partition declared from bootargs. cmdlinepart have priority than
fiexed-partition parser so in this specific case the parser doesn't
assign an of_node. Fix this by searching a defined of_node using a
similar fixed_partition parser and if a partition is found with the same
label, check that it has the same offset and size and return the DT
of_node to correctly use NVMEM cells.
Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
---
drivers/mtd/parsers/cmdlinepart.c | 71 +++++++++++++++++++++++++++++++
1 file changed, 71 insertions(+)
--- a/drivers/mtd/parsers/cmdlinepart.c
+++ b/drivers/mtd/parsers/cmdlinepart.c
@@ -43,6 +43,7 @@
#include <linux/mtd/partitions.h>
#include <linux/module.h>
#include <linux/err.h>
+#include <linux/of.h>
/* debug macro */
#if 0
@@ -323,6 +324,68 @@ static int mtdpart_setup_real(char *s)
return 0;
}
+static int search_fixed_partition(struct mtd_info *master,
+ struct mtd_partition *target_part,
+ struct mtd_partition *fixed_part)
+{
+ struct device_node *mtd_node;
+ struct device_node *ofpart_node;
+ struct device_node *pp;
+ struct mtd_partition part;
+ const char *partname;
+
+ mtd_node = mtd_get_of_node(master);
+ if (!mtd_node)
+ return -EINVAL;
+
+ ofpart_node = of_get_child_by_name(mtd_node, "partitions");
+
+ for_each_child_of_node(ofpart_node, pp) {
+ const __be32 *reg;
+ int len;
+ int a_cells, s_cells;
+
+ reg = of_get_property(pp, "reg", &len);
+ if (!reg) {
+ pr_debug("%s: ofpart partition %pOF (%pOF) missing reg property.\n",
+ master->name, pp,
+ mtd_node);
+ continue;
+ }
+
+ a_cells = of_n_addr_cells(pp);
+ s_cells = of_n_size_cells(pp);
+ if (len / 4 != a_cells + s_cells) {
+ pr_debug("%s: ofpart partition %pOF (%pOF) error parsing reg property.\n",
+ master->name, pp,
+ mtd_node);
+ continue;
+ }
+
+ part.offset = of_read_number(reg, a_cells);
+ part.size = of_read_number(reg + a_cells, s_cells);
+ part.of_node = pp;
+
+ partname = of_get_property(pp, "label", &len);
+ if (!partname)
+ partname = of_get_property(pp, "name", &len);
+ part.name = partname;
+
+ if (!strncmp(target_part->name, part.name, len)) {
+ if (part.offset != target_part->offset)
+ return -EINVAL;
+
+ if (part.size != target_part->size)
+ return -EINVAL;
+
+ memcpy(fixed_part, &part, sizeof(struct mtd_partition));
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
/*
* Main function to be called from the MTD mapping driver/device to
* obtain the partitioning information. At this point the command line
@@ -338,6 +401,7 @@ static int parse_cmdline_partitions(stru
int i, err;
struct cmdline_mtd_partition *part;
const char *mtd_id = master->name;
+ struct mtd_partition fixed_part;
/* parse command line */
if (!cmdline_parsed) {
@@ -382,6 +446,13 @@ static int parse_cmdline_partitions(stru
sizeof(*part->parts) * (part->num_parts - i));
i--;
}
+
+ err = search_fixed_partition(master, &part->parts[i], &fixed_part);
+ if (!err) {
+ part->parts[i].of_node = fixed_part.of_node;
+ pr_info("Found partition defined in DT for %s. Assigning OF node to support nvmem.",
+ part->parts[i].name);
+ }
}
*pparts = kmemdup(part->parts, sizeof(*part->parts) * part->num_parts,

View File

@ -0,0 +1,23 @@
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -61,6 +61,10 @@ config MTD_NAND_ECC_MEDIATEK
help
This enables support for the hardware ECC engine from Mediatek.
+config MTD_NAND_MTK_BMT
+ bool "Support MediaTek NAND Bad-block Management Table"
+ default n
+
endmenu
endmenu
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile
@@ -3,6 +3,7 @@
nandcore-objs := core.o bbt.o
obj-$(CONFIG_MTD_NAND_CORE) += nandcore.o
obj-$(CONFIG_MTD_NAND_ECC_MEDIATEK) += ecc-mtk.o
+obj-$(CONFIG_MTD_NAND_MTK_BMT) += mtk_bmt.o mtk_bmt_v2.o mtk_bmt_bbt.o mtk_bmt_nmbm.o
obj-y += onenand/
obj-y += raw/

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,41 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 7 Jul 2017 17:18:54 +0200
Subject: bridge: only accept EAP locally
When bridging, do not forward EAP frames to other ports, only deliver
them locally, regardless of the state.
Signed-off-by: Felix Fietkau <nbd@nbd.name>
[add disable_eap_hack sysfs attribute]
Signed-off-by: Etienne Champetier <champetier.etienne@gmail.com>
---
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -133,10 +133,14 @@ int br_handle_frame_finish(struct net *n
}
}
+ BR_INPUT_SKB_CB(skb)->brdev = br->dev;
+
+ if (skb->protocol == htons(ETH_P_PAE) && !br->disable_eap_hack)
+ return br_pass_frame_up(skb);
+
if (state == BR_STATE_LEARNING)
goto drop;
- BR_INPUT_SKB_CB(skb)->brdev = br->dev;
BR_INPUT_SKB_CB(skb)->src_port_isolated = !!(p->flags & BR_ISOLATED);
if (IS_ENABLED(CONFIG_INET) &&
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -482,6 +482,8 @@ struct net_bridge {
u16 group_fwd_mask;
u16 group_fwd_mask_required;
+ bool disable_eap_hack;
+
/* STP */
bridge_id designated_root;
bridge_id bridge_id;

View File

@ -0,0 +1,212 @@
From eda40b8c8c82e0f2789d6bc8bf63846dce2e8f32 Mon Sep 17 00:00:00 2001
From: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
Date: Sat, 23 Mar 2019 09:29:49 +0000
Subject: [PATCH] netfilter: connmark: introduce set-dscpmark
set-dscpmark is a method of storing the DSCP of an ip packet into
conntrack mark. In combination with a suitable tc filter action
(act_ctinfo) DSCP values are able to be stored in the mark on egress and
restored on ingress across links that otherwise alter or bleach DSCP.
This is useful for qdiscs such as CAKE which are able to shape according
to policies based on DSCP.
Ingress classification is traditionally a challenging task since
iptables rules haven't yet run and tc filter/eBPF programs are pre-NAT
lookups, hence are unable to see internal IPv4 addresses as used on the
typical home masquerading gateway.
x_tables CONNMARK set-dscpmark target solves the problem of storing the
DSCP to the conntrack mark in a way suitable for the new act_ctinfo tc
action to restore.
The set-dscpmark option accepts 2 parameters, a 32bit 'dscpmask' and a
32bit 'statemask'. The dscp mask must be 6 contiguous bits and
represents the area where the DSCP will be stored in the connmark. The
state mask is a minimum 1 bit length mask that must not overlap with the
dscpmask. It represents a flag which is set when the DSCP has been
stored in the conntrack mark. This is useful to implement a 'one shot'
iptables based classification where the 'complicated' iptables rules are
only run once to classify the connection on initial (egress) packet and
subsequent packets are all marked/restored with the same DSCP. A state
mask of zero disables the setting of a status bit/s.
example syntax with a suitably modified iptables user space application:
iptables -A QOS_MARK_eth0 -t mangle -j CONNMARK --set-dscpmark 0xfc000000/0x01000000
Would store the DSCP in the top 6 bits of the 32bit mark field, and use
the LSB of the top byte as the 'DSCP has been stored' marker.
|----0xFC----conntrack mark----000000---|
| Bits 31-26 | bit 25 | bit24 |~~~ Bit 0|
| DSCP | unused | flag |unused |
|-----------------------0x01---000000---|
^ ^
| |
---| Conditional flag
| set this when dscp
|-ip diffserv-| stored in mark
| 6 bits |
|-------------|
an identically configured tc action to restore looks like:
tc filter show dev eth0 ingress
filter parent ffff: protocol all pref 10 u32 chain 0
filter parent ffff: protocol all pref 10 u32 chain 0 fh 800: ht divisor 1
filter parent ffff: protocol all pref 10 u32 chain 0 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1: not_in_hw
match 00000000/00000000 at 0
action order 1: ctinfo zone 0 pipe
index 2 ref 1 bind 1 dscp 0xfc000000/0x1000000
action order 2: mirred (Egress Redirect to device ifb4eth0) stolen
index 1 ref 1 bind 1
|----0xFC----conntrack mark----000000---|
| Bits 31-26 | bit 25 | bit24 |~~~ Bit 0|
| DSCP | unused | flag |unused |
|-----------------------0x01---000000---|
| |
| |
---| Conditional flag
v only restore if set
|-ip diffserv-|
| 6 bits |
|-------------|
Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
---
include/uapi/linux/netfilter/xt_connmark.h | 10 ++++
net/netfilter/xt_connmark.c | 55 ++++++++++++++++++----
2 files changed, 57 insertions(+), 8 deletions(-)
--- a/include/uapi/linux/netfilter/xt_connmark.h
+++ b/include/uapi/linux/netfilter/xt_connmark.h
@@ -20,6 +20,11 @@ enum {
};
enum {
+ XT_CONNMARK_VALUE = (1 << 0),
+ XT_CONNMARK_DSCP = (1 << 1)
+};
+
+enum {
D_SHIFT_LEFT = 0,
D_SHIFT_RIGHT,
};
@@ -34,6 +39,11 @@ struct xt_connmark_tginfo2 {
__u8 shift_dir, shift_bits, mode;
};
+struct xt_connmark_tginfo3 {
+ __u32 ctmark, ctmask, nfmask;
+ __u8 shift_dir, shift_bits, mode, func;
+};
+
struct xt_connmark_mtinfo1 {
__u32 mark, mask;
__u8 invert;
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -24,12 +24,13 @@ MODULE_ALIAS("ipt_connmark");
MODULE_ALIAS("ip6t_connmark");
static unsigned int
-connmark_tg_shift(struct sk_buff *skb, const struct xt_connmark_tginfo2 *info)
+connmark_tg_shift(struct sk_buff *skb, const struct xt_connmark_tginfo3 *info)
{
enum ip_conntrack_info ctinfo;
u_int32_t new_targetmark;
struct nf_conn *ct;
u_int32_t newmark;
+ u_int8_t dscp;
ct = nf_ct_get(skb, &ctinfo);
if (ct == NULL)
@@ -37,12 +38,24 @@ connmark_tg_shift(struct sk_buff *skb, c
switch (info->mode) {
case XT_CONNMARK_SET:
- newmark = (ct->mark & ~info->ctmask) ^ info->ctmark;
- if (info->shift_dir == D_SHIFT_RIGHT)
- newmark >>= info->shift_bits;
- else
- newmark <<= info->shift_bits;
+ newmark = ct->mark;
+ if (info->func & XT_CONNMARK_VALUE) {
+ newmark = (newmark & ~info->ctmask) ^ info->ctmark;
+ if (info->shift_dir == D_SHIFT_RIGHT)
+ newmark >>= info->shift_bits;
+ else
+ newmark <<= info->shift_bits;
+ } else if (info->func & XT_CONNMARK_DSCP) {
+ if (skb->protocol == htons(ETH_P_IP))
+ dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
+ else /* protocol doesn't have diffserv */
+ break;
+ newmark = (newmark & ~info->ctmark) |
+ (info->ctmask | (dscp << info->shift_bits));
+ }
if (ct->mark != newmark) {
ct->mark = newmark;
nf_conntrack_event_cache(IPCT_MARK, ct);
@@ -81,20 +94,36 @@ static unsigned int
connmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_connmark_tginfo1 *info = par->targinfo;
- const struct xt_connmark_tginfo2 info2 = {
+ const struct xt_connmark_tginfo3 info3 = {
.ctmark = info->ctmark,
.ctmask = info->ctmask,
.nfmask = info->nfmask,
.mode = info->mode,
+ .func = XT_CONNMARK_VALUE
};
- return connmark_tg_shift(skb, &info2);
+ return connmark_tg_shift(skb, &info3);
}
static unsigned int
connmark_tg_v2(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_connmark_tginfo2 *info = par->targinfo;
+ const struct xt_connmark_tginfo3 info3 = {
+ .ctmark = info->ctmark,
+ .ctmask = info->ctmask,
+ .nfmask = info->nfmask,
+ .mode = info->mode,
+ .func = XT_CONNMARK_VALUE
+ };
+
+ return connmark_tg_shift(skb, &info3);
+}
+
+static unsigned int
+connmark_tg_v3(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct xt_connmark_tginfo3 *info = par->targinfo;
return connmark_tg_shift(skb, info);
}
@@ -165,6 +194,16 @@ static struct xt_target connmark_tg_reg[
.targetsize = sizeof(struct xt_connmark_tginfo2),
.destroy = connmark_tg_destroy,
.me = THIS_MODULE,
+ },
+ {
+ .name = "CONNMARK",
+ .revision = 3,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = connmark_tg_check,
+ .target = connmark_tg_v3,
+ .targetsize = sizeof(struct xt_connmark_tginfo3),
+ .destroy = connmark_tg_destroy,
+ .me = THIS_MODULE,
}
};

View File

@ -0,0 +1,776 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Tue, 20 Feb 2018 15:56:02 +0100
Subject: [PATCH] netfilter: add xt_FLOWOFFLOAD target
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
create mode 100644 net/netfilter/xt_OFFLOAD.c
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -712,8 +712,6 @@ config NFT_REJECT_NETDEV
endif # NF_TABLES_NETDEV
-endif # NF_TABLES
-
config NF_FLOW_TABLE_INET
tristate "Netfilter flow table mixed IPv4/IPv6 module"
depends on NF_FLOW_TABLE
@@ -722,11 +720,12 @@ config NF_FLOW_TABLE_INET
To compile it as a module, choose M here.
+endif # NF_TABLES
+
config NF_FLOW_TABLE
tristate "Netfilter flow table module"
depends on NETFILTER_INGRESS
depends on NF_CONNTRACK
- depends on NF_TABLES
help
This option adds the flow table core infrastructure.
@@ -1023,6 +1022,15 @@ config NETFILTER_XT_TARGET_NOTRACK
depends on NETFILTER_ADVANCED
select NETFILTER_XT_TARGET_CT
+config NETFILTER_XT_TARGET_FLOWOFFLOAD
+ tristate '"FLOWOFFLOAD" target support'
+ depends on NF_FLOW_TABLE
+ depends on NETFILTER_INGRESS
+ help
+ This option adds a `FLOWOFFLOAD' target, which uses the nf_flow_offload
+ module to speed up processing of packets by bypassing the usual
+ netfilter chains
+
config NETFILTER_XT_TARGET_RATEEST
tristate '"RATEEST" target support'
depends on NETFILTER_ADVANCED
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -148,6 +148,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIF
obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_FLOWOFFLOAD) += xt_FLOWOFFLOAD.o
obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
obj-$(CONFIG_NETFILTER_XT_TARGET_HMARK) += xt_HMARK.o
obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
--- /dev/null
+++ b/net/netfilter/xt_FLOWOFFLOAD.c
@@ -0,0 +1,694 @@
+/*
+ * Copyright (C) 2018-2021 Felix Fietkau <nbd@nbd.name>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/xt_FLOWOFFLOAD.h>
+#include <linux/if_vlan.h>
+#include <net/ip.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_flow_table.h>
+
+struct xt_flowoffload_hook {
+ struct hlist_node list;
+ struct nf_hook_ops ops;
+ struct net *net;
+ bool registered;
+ bool used;
+};
+
+struct xt_flowoffload_table {
+ struct nf_flowtable ft;
+ struct hlist_head hooks;
+ struct delayed_work work;
+};
+
+struct nf_forward_info {
+ const struct net_device *indev;
+ const struct net_device *outdev;
+ const struct net_device *hw_outdev;
+ struct id {
+ __u16 id;
+ __be16 proto;
+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
+ u8 num_encaps;
+ u8 ingress_vlans;
+ u8 h_source[ETH_ALEN];
+ u8 h_dest[ETH_ALEN];
+ enum flow_offload_xmit_type xmit_type;
+};
+
+static DEFINE_SPINLOCK(hooks_lock);
+
+struct xt_flowoffload_table flowtable[2];
+
+static unsigned int
+xt_flowoffload_net_hook(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct vlan_ethhdr *veth;
+ __be16 proto;
+
+ switch (skb->protocol) {
+ case htons(ETH_P_8021Q):
+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
+ proto = veth->h_vlan_encapsulated_proto;
+ break;
+ case htons(ETH_P_PPP_SES):
+ proto = nf_flow_pppoe_proto(skb);
+ break;
+ default:
+ proto = skb->protocol;
+ break;
+ }
+
+ switch (proto) {
+ case htons(ETH_P_IP):
+ return nf_flow_offload_ip_hook(priv, skb, state);
+ case htons(ETH_P_IPV6):
+ return nf_flow_offload_ipv6_hook(priv, skb, state);
+ }
+
+ return NF_ACCEPT;
+}
+
+static int
+xt_flowoffload_create_hook(struct xt_flowoffload_table *table,
+ struct net_device *dev)
+{
+ struct xt_flowoffload_hook *hook;
+ struct nf_hook_ops *ops;
+
+ hook = kzalloc(sizeof(*hook), GFP_ATOMIC);
+ if (!hook)
+ return -ENOMEM;
+
+ ops = &hook->ops;
+ ops->pf = NFPROTO_NETDEV;
+ ops->hooknum = NF_NETDEV_INGRESS;
+ ops->priority = 10;
+ ops->priv = &table->ft;
+ ops->hook = xt_flowoffload_net_hook;
+ ops->dev = dev;
+
+ hlist_add_head(&hook->list, &table->hooks);
+ mod_delayed_work(system_power_efficient_wq, &table->work, 0);
+
+ return 0;
+}
+
+static struct xt_flowoffload_hook *
+flow_offload_lookup_hook(struct xt_flowoffload_table *table,
+ struct net_device *dev)
+{
+ struct xt_flowoffload_hook *hook;
+
+ hlist_for_each_entry(hook, &table->hooks, list) {
+ if (hook->ops.dev == dev)
+ return hook;
+ }
+
+ return NULL;
+}
+
+static void
+xt_flowoffload_check_device(struct xt_flowoffload_table *table,
+ struct net_device *dev)
+{
+ struct xt_flowoffload_hook *hook;
+
+ if (!dev)
+ return;
+
+ spin_lock_bh(&hooks_lock);
+ hook = flow_offload_lookup_hook(table, dev);
+ if (hook)
+ hook->used = true;
+ else
+ xt_flowoffload_create_hook(table, dev);
+ spin_unlock_bh(&hooks_lock);
+}
+
+static void
+xt_flowoffload_register_hooks(struct xt_flowoffload_table *table)
+{
+ struct xt_flowoffload_hook *hook;
+
+restart:
+ hlist_for_each_entry(hook, &table->hooks, list) {
+ if (hook->registered)
+ continue;
+
+ hook->registered = true;
+ hook->net = dev_net(hook->ops.dev);
+ spin_unlock_bh(&hooks_lock);
+ nf_register_net_hook(hook->net, &hook->ops);
+ if (table->ft.flags & NF_FLOWTABLE_HW_OFFLOAD)
+ table->ft.type->setup(&table->ft, hook->ops.dev,
+ FLOW_BLOCK_BIND);
+ spin_lock_bh(&hooks_lock);
+ goto restart;
+ }
+
+}
+
+static bool
+xt_flowoffload_cleanup_hooks(struct xt_flowoffload_table *table)
+{
+ struct xt_flowoffload_hook *hook;
+ bool active = false;
+
+restart:
+ spin_lock_bh(&hooks_lock);
+ hlist_for_each_entry(hook, &table->hooks, list) {
+ if (hook->used || !hook->registered) {
+ active = true;
+ continue;
+ }
+
+ hlist_del(&hook->list);
+ spin_unlock_bh(&hooks_lock);
+ if (table->ft.flags & NF_FLOWTABLE_HW_OFFLOAD)
+ table->ft.type->setup(&table->ft, hook->ops.dev,
+ FLOW_BLOCK_UNBIND);
+ nf_unregister_net_hook(hook->net, &hook->ops);
+ kfree(hook);
+ goto restart;
+ }
+ spin_unlock_bh(&hooks_lock);
+
+ return active;
+}
+
+static void
+xt_flowoffload_check_hook(struct flow_offload *flow, void *data)
+{
+ struct xt_flowoffload_table *table = data;
+ struct flow_offload_tuple *tuple0 = &flow->tuplehash[0].tuple;
+ struct flow_offload_tuple *tuple1 = &flow->tuplehash[1].tuple;
+ struct xt_flowoffload_hook *hook;
+
+ spin_lock_bh(&hooks_lock);
+ hlist_for_each_entry(hook, &table->hooks, list) {
+ if (hook->ops.dev->ifindex != tuple0->iifidx &&
+ hook->ops.dev->ifindex != tuple1->iifidx)
+ continue;
+
+ hook->used = true;
+ }
+ spin_unlock_bh(&hooks_lock);
+}
+
+static void
+xt_flowoffload_hook_work(struct work_struct *work)
+{
+ struct xt_flowoffload_table *table;
+ struct xt_flowoffload_hook *hook;
+ int err;
+
+ table = container_of(work, struct xt_flowoffload_table, work.work);
+
+ spin_lock_bh(&hooks_lock);
+ xt_flowoffload_register_hooks(table);
+ hlist_for_each_entry(hook, &table->hooks, list)
+ hook->used = false;
+ spin_unlock_bh(&hooks_lock);
+
+
+
+ if (err && err != -EAGAIN)
+ goto out;
+
+ if (!xt_flowoffload_cleanup_hooks(table))
+ return;
+
+out:
+ queue_delayed_work(system_power_efficient_wq, &table->work, HZ);
+}
+
+static bool
+xt_flowoffload_skip(struct sk_buff *skb, int family)
+{
+ if (skb_sec_path(skb))
+ return true;
+
+ if (family == NFPROTO_IPV4) {
+ const struct ip_options *opt = &(IPCB(skb)->opt);
+
+ if (unlikely(opt->optlen))
+ return true;
+ }
+
+ return false;
+}
+
+static enum flow_offload_xmit_type nf_xmit_type(struct dst_entry *dst)
+{
+ if (dst_xfrm(dst))
+ return FLOW_OFFLOAD_XMIT_XFRM;
+
+ return FLOW_OFFLOAD_XMIT_NEIGH;
+}
+
+static void nf_default_forward_path(struct nf_flow_route *route,
+ struct dst_entry *dst_cache,
+ enum ip_conntrack_dir dir,
+ struct net_device **dev)
+{
+ dev[!dir] = dst_cache->dev;
+ route->tuple[!dir].in.ifindex = dst_cache->dev->ifindex;
+ route->tuple[dir].dst = dst_cache;
+ route->tuple[dir].xmit_type = nf_xmit_type(dst_cache);
+}
+
+static bool nf_is_valid_ether_device(const struct net_device *dev)
+{
+ if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
+ dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
+ return false;
+
+ return true;
+}
+
+static void nf_dev_path_info(const struct net_device_path_stack *stack,
+ struct nf_forward_info *info,
+ unsigned char *ha)
+{
+ const struct net_device_path *path;
+ int i;
+
+ memcpy(info->h_dest, ha, ETH_ALEN);
+
+ for (i = 0; i < stack->num_paths; i++) {
+ path = &stack->path[i];
+ switch (path->type) {
+ case DEV_PATH_ETHERNET:
+ case DEV_PATH_DSA:
+ case DEV_PATH_VLAN:
+ case DEV_PATH_PPPOE:
+ info->indev = path->dev;
+ if (is_zero_ether_addr(info->h_source))
+ memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
+
+ if (path->type == DEV_PATH_ETHERNET)
+ break;
+ if (path->type == DEV_PATH_DSA) {
+ i = stack->num_paths;
+ break;
+ }
+
+ /* DEV_PATH_VLAN and DEV_PATH_PPPOE */
+ if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) {
+ info->indev = NULL;
+ break;
+ }
+ if (!info->outdev)
+ info->outdev = path->dev;
+ info->encap[info->num_encaps].id = path->encap.id;
+ info->encap[info->num_encaps].proto = path->encap.proto;
+ info->num_encaps++;
+ if (path->type == DEV_PATH_PPPOE)
+ memcpy(info->h_dest, path->encap.h_dest, ETH_ALEN);
+ break;
+ case DEV_PATH_BRIDGE:
+ if (is_zero_ether_addr(info->h_source))
+ memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
+
+ switch (path->bridge.vlan_mode) {
+ case DEV_PATH_BR_VLAN_UNTAG_HW:
+ info->ingress_vlans |= BIT(info->num_encaps - 1);
+ break;
+ case DEV_PATH_BR_VLAN_TAG:
+ info->encap[info->num_encaps].id = path->bridge.vlan_id;
+ info->encap[info->num_encaps].proto = path->bridge.vlan_proto;
+ info->num_encaps++;
+ break;
+ case DEV_PATH_BR_VLAN_UNTAG:
+ info->num_encaps--;
+ break;
+ case DEV_PATH_BR_VLAN_KEEP:
+ break;
+ }
+ break;
+ default:
+ info->indev = NULL;
+ break;
+ }
+ }
+ if (!info->outdev)
+ info->outdev = info->indev;
+
+ info->hw_outdev = info->indev;
+
+ if (nf_is_valid_ether_device(info->indev))
+ info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
+}
+
+static int nf_dev_fill_forward_path(const struct nf_flow_route *route,
+ const struct dst_entry *dst_cache,
+ const struct nf_conn *ct,
+ enum ip_conntrack_dir dir, u8 *ha,
+ struct net_device_path_stack *stack)
+{
+ const void *daddr = &ct->tuplehash[!dir].tuple.src.u3;
+ struct net_device *dev = dst_cache->dev;
+ struct neighbour *n;
+ u8 nud_state;
+
+ if (!nf_is_valid_ether_device(dev))
+ goto out;
+
+ n = dst_neigh_lookup(dst_cache, daddr);
+ if (!n)
+ return -1;
+
+ read_lock_bh(&n->lock);
+ nud_state = n->nud_state;
+ ether_addr_copy(ha, n->ha);
+ read_unlock_bh(&n->lock);
+ neigh_release(n);
+
+ if (!(nud_state & NUD_VALID))
+ return -1;
+
+out:
+ return dev_fill_forward_path(dev, ha, stack);
+}
+
+static void nf_dev_forward_path(struct nf_flow_route *route,
+ const struct nf_conn *ct,
+ enum ip_conntrack_dir dir,
+ struct net_device **devs)
+{
+ const struct dst_entry *dst = route->tuple[dir].dst;
+ struct net_device_path_stack stack;
+ struct nf_forward_info info = {};
+ unsigned char ha[ETH_ALEN];
+ int i;
+
+ if (nf_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0)
+ nf_dev_path_info(&stack, &info, ha);
+
+ devs[!dir] = (struct net_device *)info.indev;
+ if (!info.indev)
+ return;
+
+ route->tuple[!dir].in.ifindex = info.indev->ifindex;
+ for (i = 0; i < info.num_encaps; i++) {
+ route->tuple[!dir].in.encap[i].id = info.encap[i].id;
+ route->tuple[!dir].in.encap[i].proto = info.encap[i].proto;
+ }
+ route->tuple[!dir].in.num_encaps = info.num_encaps;
+ route->tuple[!dir].in.ingress_vlans = info.ingress_vlans;
+
+ if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
+ memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);
+ memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN);
+ route->tuple[dir].out.ifindex = info.outdev->ifindex;
+ route->tuple[dir].out.hw_ifindex = info.hw_outdev->ifindex;
+ route->tuple[dir].xmit_type = info.xmit_type;
+ }
+}
+
+static int
+xt_flowoffload_route(struct sk_buff *skb, const struct nf_conn *ct,
+ const struct xt_action_param *par,
+ struct nf_flow_route *route, enum ip_conntrack_dir dir,
+ struct net_device **devs)
+{
+ struct dst_entry *this_dst = skb_dst(skb);
+ struct dst_entry *other_dst = NULL;
+ struct flowi fl;
+
+ memset(&fl, 0, sizeof(fl));
+ switch (xt_family(par)) {
+ case NFPROTO_IPV4:
+ fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip;
+ fl.u.ip4.flowi4_oif = xt_in(par)->ifindex;
+ break;
+ case NFPROTO_IPV6:
+ fl.u.ip6.saddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
+ fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6;
+ fl.u.ip6.flowi6_oif = xt_in(par)->ifindex;
+ break;
+ }
+
+ nf_route(xt_net(par), &other_dst, &fl, false, xt_family(par));
+ if (!other_dst)
+ return -ENOENT;
+
+ nf_default_forward_path(route, this_dst, dir, devs);
+ nf_default_forward_path(route, other_dst, !dir, devs);
+
+ if (route->tuple[dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH &&
+ route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) {
+ nf_dev_forward_path(route, ct, dir, devs);
+ nf_dev_forward_path(route, ct, !dir, devs);
+ }
+
+ return 0;
+}
+
+static unsigned int
+flowoffload_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ struct xt_flowoffload_table *table;
+ const struct xt_flowoffload_target_info *info = par->targinfo;
+ struct tcphdr _tcph, *tcph = NULL;
+ enum ip_conntrack_info ctinfo;
+ enum ip_conntrack_dir dir;
+ struct nf_flow_route route = {};
+ struct flow_offload *flow = NULL;
+ struct net_device *devs[2] = {};
+ struct nf_conn *ct;
+ struct net *net;
+
+ if (xt_flowoffload_skip(skb, xt_family(par)))
+ return XT_CONTINUE;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct == NULL)
+ return XT_CONTINUE;
+
+ switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) {
+ case IPPROTO_TCP:
+ if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED)
+ return XT_CONTINUE;
+
+ tcph = skb_header_pointer(skb, par->thoff,
+ sizeof(_tcph), &_tcph);
+ if (unlikely(!tcph || tcph->fin || tcph->rst))
+ return XT_CONTINUE;
+ break;
+ case IPPROTO_UDP:
+ break;
+ default:
+ return XT_CONTINUE;
+ }
+
+ if (nf_ct_ext_exist(ct, NF_CT_EXT_HELPER) ||
+ ct->status & (IPS_SEQ_ADJUST | IPS_NAT_CLASH))
+ return XT_CONTINUE;
+
+ if (!nf_ct_is_confirmed(ct))
+ return XT_CONTINUE;
+
+ devs[dir] = xt_out(par);
+ devs[!dir] = xt_in(par);
+
+ if (!devs[dir] || !devs[!dir])
+ return XT_CONTINUE;
+
+ if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status))
+ return XT_CONTINUE;
+
+ dir = CTINFO2DIR(ctinfo);
+
+ if (xt_flowoffload_route(skb, ct, par, &route, dir, devs) < 0)
+ goto err_flow_route;
+
+ flow = flow_offload_alloc(ct);
+ if (!flow)
+ goto err_flow_alloc;
+
+ if (flow_offload_route_init(flow, &route) < 0)
+ goto err_flow_add;
+
+ if (tcph) {
+ ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
+ ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
+ }
+
+ table = &flowtable[!!(info->flags & XT_FLOWOFFLOAD_HW)];
+
+ net = read_pnet(&table->ft.net);
+ if (!net)
+ write_pnet(&table->ft.net, xt_net(par));
+
+ if (flow_offload_add(&table->ft, flow) < 0)
+ goto err_flow_add;
+
+ xt_flowoffload_check_device(table, devs[0]);
+ xt_flowoffload_check_device(table, devs[1]);
+
+ dst_release(route.tuple[!dir].dst);
+
+ return XT_CONTINUE;
+
+err_flow_add:
+ flow_offload_free(flow);
+err_flow_alloc:
+ dst_release(route.tuple[!dir].dst);
+err_flow_route:
+ clear_bit(IPS_OFFLOAD_BIT, &ct->status);
+
+ return XT_CONTINUE;
+}
+
+static int flowoffload_chk(const struct xt_tgchk_param *par)
+{
+ struct xt_flowoffload_target_info *info = par->targinfo;
+
+ if (info->flags & ~XT_FLOWOFFLOAD_MASK)
+ return -EINVAL;
+
+ return 0;
+}
+
+static struct xt_target offload_tg_reg __read_mostly = {
+ .family = NFPROTO_UNSPEC,
+ .name = "FLOWOFFLOAD",
+ .revision = 0,
+ .targetsize = sizeof(struct xt_flowoffload_target_info),
+ .usersize = sizeof(struct xt_flowoffload_target_info),
+ .checkentry = flowoffload_chk,
+ .target = flowoffload_tg,
+ .me = THIS_MODULE,
+};
+
+static int flow_offload_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct xt_flowoffload_hook *hook0, *hook1;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+ if (event != NETDEV_UNREGISTER)
+ return NOTIFY_DONE;
+
+ spin_lock_bh(&hooks_lock);
+ hook0 = flow_offload_lookup_hook(&flowtable[0], dev);
+ if (hook0)
+ hlist_del(&hook0->list);
+
+ hook1 = flow_offload_lookup_hook(&flowtable[1], dev);
+ if (hook1)
+ hlist_del(&hook1->list);
+ spin_unlock_bh(&hooks_lock);
+
+ if (hook0) {
+ nf_unregister_net_hook(hook0->net, &hook0->ops);
+ kfree(hook0);
+ }
+
+ if (hook1) {
+ nf_unregister_net_hook(hook1->net, &hook1->ops);
+ kfree(hook1);
+ }
+
+ nf_flow_table_cleanup(dev);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block flow_offload_netdev_notifier = {
+ .notifier_call = flow_offload_netdev_event,
+};
+
+static int nf_flow_rule_route_inet(struct net *net,
+ const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
+{
+ const struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
+ int err;
+
+ switch (flow_tuple->l3proto) {
+ case NFPROTO_IPV4:
+ err = nf_flow_rule_route_ipv4(net, flow, dir, flow_rule);
+ break;
+ case NFPROTO_IPV6:
+ err = nf_flow_rule_route_ipv6(net, flow, dir, flow_rule);
+ break;
+ default:
+ err = -1;
+ break;
+ }
+
+ return err;
+}
+
+static struct nf_flowtable_type flowtable_inet = {
+ .family = NFPROTO_INET,
+ .init = nf_flow_table_init,
+ .setup = nf_flow_table_offload_setup,
+ .action = nf_flow_rule_route_inet,
+ .free = nf_flow_table_free,
+ .hook = xt_flowoffload_net_hook,
+ .owner = THIS_MODULE,
+};
+
+static int init_flowtable(struct xt_flowoffload_table *tbl)
+{
+ INIT_DELAYED_WORK(&tbl->work, xt_flowoffload_hook_work);
+ tbl->ft.type = &flowtable_inet;
+
+ return nf_flow_table_init(&tbl->ft);
+}
+
+static int __init xt_flowoffload_tg_init(void)
+{
+ int ret;
+
+ register_netdevice_notifier(&flow_offload_netdev_notifier);
+
+ ret = init_flowtable(&flowtable[0]);
+ if (ret)
+ return ret;
+
+ ret = init_flowtable(&flowtable[1]);
+ if (ret)
+ goto cleanup;
+
+ flowtable[1].ft.flags = NF_FLOWTABLE_HW_OFFLOAD;
+
+ ret = xt_register_target(&offload_tg_reg);
+ if (ret)
+ goto cleanup2;
+
+ return 0;
+
+cleanup2:
+ nf_flow_table_free(&flowtable[1].ft);
+cleanup:
+ nf_flow_table_free(&flowtable[0].ft);
+ return ret;
+}
+
+static void __exit xt_flowoffload_tg_exit(void)
+{
+ xt_unregister_target(&offload_tg_reg);
+ unregister_netdevice_notifier(&flow_offload_netdev_notifier);
+ nf_flow_table_free(&flowtable[0].ft);
+ nf_flow_table_free(&flowtable[1].ft);
+}
+
+MODULE_LICENSE("GPL");
+module_init(xt_flowoffload_tg_init);
+module_exit(xt_flowoffload_tg_exit);
--- /dev/null
+++ b/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _XT_FLOWOFFLOAD_H
+#define _XT_FLOWOFFLOAD_H
+
+#include <linux/types.h>
+
+enum {
+ XT_FLOWOFFLOAD_HW = 1 << 0,
+
+ XT_FLOWOFFLOAD_MASK = XT_FLOWOFFLOAD_HW
+};
+
+struct xt_flowoffload_target_info {
+ __u32 flags;
+};
+
+#endif /* _XT_FLOWOFFLOAD_H */

View File

@ -0,0 +1,24 @@
From 6d3bc769657b0ee7c7506dad9911111c4226a7ea Mon Sep 17 00:00:00 2001
From: Imre Kaloz <kaloz@openwrt.org>
Date: Fri, 7 Jul 2017 17:21:05 +0200
Subject: mac80211: increase wireless mesh header size
lede-commit 3d4466cfd8f75f717efdb1f96fdde3c70d865fc1
Signed-off-by: Imre Kaloz <kaloz@openwrt.org>
---
include/linux/netdevice.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -149,8 +149,8 @@ static inline bool dev_xmit_complete(int
#if defined(CONFIG_HYPERV_NET)
# define LL_MAX_HEADER 128
-#elif defined(CONFIG_WLAN) || IS_ENABLED(CONFIG_AX25)
-# if defined(CONFIG_MAC80211_MESH)
+#elif defined(CONFIG_WLAN) || IS_ENABLED(CONFIG_AX25) || 1
+# if defined(CONFIG_MAC80211_MESH) || 1
# define LL_MAX_HEADER 128
# else
# define LL_MAX_HEADER 96

View File

@ -0,0 +1,27 @@
From a6ccb238939b25851474a279b20367fd24a0e816 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 7 Jul 2017 17:21:53 +0200
Subject: hack: net: fq_codel: tune defaults for small devices
Assume that x86_64 devices always have a big memory and do not need this
optimization compared to devices with only 32 MB or 64 MB RAM.
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
net/sched/sch_fq_codel.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -476,7 +476,11 @@ static int fq_codel_init(struct Qdisc *s
sch->limit = 10*1024;
q->flows_cnt = 1024;
+#ifdef CONFIG_X86_64
q->memory_limit = 32 << 20; /* 32 MBytes */
+#else
+ q->memory_limit = 4 << 20; /* 4 MBytes */
+#endif
q->drop_batch_size = 64;
q->quantum = psched_mtu(qdisc_dev(sch));
INIT_LIST_HEAD(&q->new_flows);

View File

@ -0,0 +1,100 @@
From 1d418f7e88035ed7a94073f6354246c66e9193e9 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 7 Jul 2017 17:22:58 +0200
Subject: fq_codel: switch default qdisc from pfifo_fast to fq_codel and remove pfifo_fast
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
include/net/sch_generic.h | 3 ++-
net/sched/Kconfig | 3 ++-
net/sched/sch_api.c | 2 +-
net/sched/sch_fq_codel.c | 3 ++-
net/sched/sch_generic.c | 4 ++--
5 files changed, 9 insertions(+), 6 deletions(-)
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -585,12 +585,13 @@ extern struct Qdisc_ops noop_qdisc_ops;
extern struct Qdisc_ops pfifo_fast_ops;
extern struct Qdisc_ops mq_qdisc_ops;
extern struct Qdisc_ops noqueue_qdisc_ops;
+extern struct Qdisc_ops fq_codel_qdisc_ops;
extern const struct Qdisc_ops *default_qdisc_ops;
static inline const struct Qdisc_ops *
get_default_qdisc_ops(const struct net_device *dev, int ntx)
{
return ntx < dev->real_num_tx_queues ?
- default_qdisc_ops : &pfifo_fast_ops;
+ default_qdisc_ops : &fq_codel_qdisc_ops;
}
struct Qdisc_class_common {
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -4,8 +4,9 @@
#
menuconfig NET_SCHED
- bool "QoS and/or fair queueing"
+ def_bool y
select NET_SCH_FIFO
+ select NET_SCH_FQ_CODEL
help
When the kernel has several packets to send out over a network
device, it has to decide which ones to send first, which ones to
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -2276,7 +2276,7 @@ static int __init pktsched_init(void)
return err;
}
- register_qdisc(&pfifo_fast_ops);
+ register_qdisc(&fq_codel_qdisc_ops);
register_qdisc(&pfifo_qdisc_ops);
register_qdisc(&bfifo_qdisc_ops);
register_qdisc(&pfifo_head_drop_qdisc_ops);
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -721,7 +721,7 @@ static const struct Qdisc_class_ops fq_c
.walk = fq_codel_walk,
};
-static struct Qdisc_ops fq_codel_qdisc_ops __read_mostly = {
+struct Qdisc_ops fq_codel_qdisc_ops __read_mostly = {
.cl_ops = &fq_codel_class_ops,
.id = "fq_codel",
.priv_size = sizeof(struct fq_codel_sched_data),
@@ -736,6 +736,7 @@ static struct Qdisc_ops fq_codel_qdisc_o
.dump_stats = fq_codel_dump_stats,
.owner = THIS_MODULE,
};
+EXPORT_SYMBOL(fq_codel_qdisc_ops);
static int __init fq_codel_module_init(void)
{
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -32,7 +32,7 @@
#include <net/xfrm.h>
/* Qdisc to use by default */
-const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops;
+const struct Qdisc_ops *default_qdisc_ops = &fq_codel_qdisc_ops;
EXPORT_SYMBOL(default_qdisc_ops);
static void qdisc_maybe_clear_missed(struct Qdisc *q,
@@ -1142,12 +1142,12 @@ static void attach_one_default_qdisc(str
void *_unused)
{
struct Qdisc *qdisc;
- const struct Qdisc_ops *ops = default_qdisc_ops;
+ const struct Qdisc_ops *ops = &fq_codel_qdisc_ops;
if (dev->priv_flags & IFF_NO_QUEUE)
ops = &noqueue_qdisc_ops;
else if(dev->type == ARPHRD_CAN)
- ops = &pfifo_fast_ops;
+ ops = &fq_codel_qdisc_ops;
qdisc = qdisc_create_dflt(dev_queue, ops, TC_H_ROOT, NULL);
if (!qdisc)

View File

@ -0,0 +1,129 @@
From 36e516290611e613aa92996cb4339561452695b4 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 7 Jul 2017 17:24:23 +0200
Subject: net: swconfig: adds openwrt switch layer
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
drivers/net/phy/Kconfig | 83 +++++++++++++++++++++++++++++++++++++++++++++++
drivers/net/phy/Makefile | 15 +++++++++
include/uapi/linux/Kbuild | 1 +
3 files changed, 99 insertions(+)
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -61,6 +61,80 @@ config SFP
depends on HWMON || HWMON=n
select MDIO_I2C
+comment "Switch configuration API + drivers"
+
+config SWCONFIG
+ tristate "Switch configuration API"
+ help
+ Switch configuration API using netlink. This allows
+ you to configure the VLAN features of certain switches.
+
+config SWCONFIG_LEDS
+ bool "Switch LED trigger support"
+ depends on (SWCONFIG && LEDS_TRIGGERS)
+
+config ADM6996_PHY
+ tristate "Driver for ADM6996 switches"
+ select SWCONFIG
+ help
+ Currently supports the ADM6996FC and ADM6996M switches.
+ Support for FC is very limited.
+
+config AR8216_PHY
+ tristate "Driver for Atheros AR8216 switches"
+ select SWCONFIG
+
+config AR8216_PHY_LEDS
+ bool "Atheros AR8216 switch LED support"
+ depends on (AR8216_PHY && LEDS_CLASS)
+
+source "drivers/net/phy/b53/Kconfig"
+
+config IP17XX_PHY
+ tristate "Driver for IC+ IP17xx switches"
+ select SWCONFIG
+
+config PSB6970_PHY
+ tristate "Lantiq XWAY Tantos (PSB6970) Ethernet switch"
+ select SWCONFIG
+ select ETHERNET_PACKET_MANGLE
+
+config RTL8306_PHY
+ tristate "Driver for Realtek RTL8306S switches"
+ select SWCONFIG
+
+config RTL8366_SMI
+ tristate "Driver for the RTL8366 SMI interface"
+ depends on GPIOLIB
+ help
+ This module implements the SMI interface protocol which is used
+ by some RTL8366 ethernet switch devices via the generic GPIO API.
+
+if RTL8366_SMI
+
+config RTL8366_SMI_DEBUG_FS
+ bool "RTL8366 SMI interface debugfs support"
+ depends on DEBUG_FS
+ default n
+
+config RTL8366S_PHY
+ tristate "Driver for the Realtek RTL8366S switch"
+ select SWCONFIG
+
+config RTL8366RB_PHY
+ tristate "Driver for the Realtek RTL8366RB switch"
+ select SWCONFIG
+
+config RTL8367_PHY
+ tristate "Driver for the Realtek RTL8367R/M switches"
+ select SWCONFIG
+
+config RTL8367B_PHY
+ tristate "Driver fot the Realtek RTL8367R-VB switch"
+ select SWCONFIG
+
+endif # RTL8366_SMI
+
comment "MII PHY device drivers"
config AMD_PHY
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -24,6 +24,19 @@ libphy-$(CONFIG_LED_TRIGGER_PHY) += phy_
obj-$(CONFIG_PHYLINK) += phylink.o
obj-$(CONFIG_PHYLIB) += libphy.o
+obj-$(CONFIG_SWCONFIG) += swconfig.o
+obj-$(CONFIG_ADM6996_PHY) += adm6996.o
+obj-$(CONFIG_AR8216_PHY) += ar8216.o ar8327.o
+obj-$(CONFIG_SWCONFIG_B53) += b53/
+obj-$(CONFIG_IP17XX_PHY) += ip17xx.o
+obj-$(CONFIG_PSB6970_PHY) += psb6970.o
+obj-$(CONFIG_RTL8306_PHY) += rtl8306.o
+obj-$(CONFIG_RTL8366_SMI) += rtl8366_smi.o
+obj-$(CONFIG_RTL8366S_PHY) += rtl8366s.o
+obj-$(CONFIG_RTL8366RB_PHY) += rtl8366rb.o
+obj-$(CONFIG_RTL8367_PHY) += rtl8367.o
+obj-$(CONFIG_RTL8367B_PHY) += rtl8367b.o
+
obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += mii_timestamper.o
obj-$(CONFIG_SFP) += sfp.o
--- a/include/linux/platform_data/b53.h
+++ b/include/linux/platform_data/b53.h
@@ -29,6 +29,9 @@ struct b53_platform_data {
u32 chip_id;
u16 enabled_ports;
+ /* allow to specify an ethX alias */
+ const char *alias;
+
/* only used by MMAP'd driver */
unsigned big_endian:1;
void __iomem *regs;

View File

@ -0,0 +1,74 @@
From 82985725e071f2a5735052f18e109a32aeac3a0b Mon Sep 17 00:00:00 2001
From: David Bauer <mail@david-bauer.net>
Date: Sun, 26 Jul 2020 02:38:31 +0200
Subject: [PATCH] net: usb: r8152: add LED configuration from OF
This adds the ability to configure the LED configuration register using
OF. This way, the correct value for board specific LED configuration can
be determined.
Signed-off-by: David Bauer <mail@david-bauer.net>
---
drivers/net/usb/r8152.c | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -11,6 +11,7 @@
#include <linux/mii.h>
#include <linux/ethtool.h>
#include <linux/usb.h>
+#include <linux/of.h>
#include <linux/crc32.h>
#include <linux/if_vlan.h>
#include <linux/uaccess.h>
@@ -6866,6 +6867,22 @@ static void rtl_tally_reset(struct r8152
ocp_write_word(tp, MCU_TYPE_PLA, PLA_RSTTALLY, ocp_data);
}
+static int r8152_led_configuration(struct r8152 *tp)
+{
+ u32 led_data;
+ int ret;
+
+ ret = of_property_read_u32(tp->udev->dev.of_node, "realtek,led-data",
+ &led_data);
+
+ if (ret)
+ return ret;
+
+ ocp_write_word(tp, MCU_TYPE_PLA, PLA_LEDSEL, led_data);
+
+ return 0;
+}
+
static void r8152b_init(struct r8152 *tp)
{
u32 ocp_data;
@@ -6907,6 +6924,8 @@ static void r8152b_init(struct r8152 *tp
ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_USB_CTRL);
ocp_data &= ~(RX_AGG_DISABLE | RX_ZERO_EN);
ocp_write_word(tp, MCU_TYPE_USB, USB_USB_CTRL, ocp_data);
+
+ r8152_led_configuration(tp);
}
static void r8153_init(struct r8152 *tp)
@@ -7047,6 +7066,8 @@ static void r8153_init(struct r8152 *tp)
tp->coalesce = COALESCE_SLOW;
break;
}
+
+ r8152_led_configuration(tp);
}
static void r8153b_init(struct r8152 *tp)
@@ -7129,6 +7150,8 @@ static void r8153b_init(struct r8152 *tp
rtl_tally_reset(tp);
tp->coalesce = 15000; /* 15 us */
+
+ r8152_led_configuration(tp);
}
static void r8153c_init(struct r8152 *tp)

View File

@ -0,0 +1,54 @@
From 3ee05f4aa64fc86af3be5bc176ba5808de9260a7 Mon Sep 17 00:00:00 2001
From: David Bauer <mail@david-bauer.net>
Date: Sun, 26 Jul 2020 15:30:33 +0200
Subject: [PATCH] dt-bindings: net: add RTL8152 binding documentation
Add binding documentation for the Realtek RTL8152 / RTL8153 USB ethernet
adapters.
Signed-off-by: David Bauer <mail@david-bauer.net>
---
.../bindings/net/realtek,rtl8152.yaml | 36 +++++++++++++++++++
1 file changed, 36 insertions(+)
create mode 100644 Documentation/devicetree/bindings/net/realtek,rtl8152.yaml
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/realtek,rtl8152.yaml
@@ -0,0 +1,36 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/realtek,rtl8152.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Realtek RTL8152/RTL8153 series USB ethernet
+
+maintainers:
+ - David Bauer <mail@david-bauer.net>
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - realtek,rtl8152
+ - realtek,rtl8153
+
+ reg:
+ description: The device number on the USB bus
+
+ realtek,led-data:
+ description: Value to be written to the LED configuration register.
+
+required:
+ - compatible
+ - reg
+
+examples:
+ - |
+ usb-eth@2 {
+ compatible = "realtek,rtl8153";
+ reg = <2>;
+ realtek,led-data = <0x87>;
+ };
\ No newline at end of file

View File

@ -0,0 +1,98 @@
From 3cb240533ab787899dc7f17aa7d6c5b4810e2e58 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Fri, 7 Jul 2017 17:26:01 +0200
Subject: bcm53xx: bgmac: use srab switch driver
use the srab switch driver on these SoCs.
Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
---
drivers/net/ethernet/broadcom/bgmac-bcma.c | 1 +
drivers/net/ethernet/broadcom/bgmac.c | 24 ++++++++++++++++++++++++
drivers/net/ethernet/broadcom/bgmac.h | 4 ++++
3 files changed, 29 insertions(+)
--- a/drivers/net/ethernet/broadcom/bgmac-bcma.c
+++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c
@@ -280,6 +280,7 @@ static int bgmac_probe(struct bcma_devic
bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
bgmac->feature_flags |= BGMAC_FEAT_NO_RESET;
bgmac->feature_flags |= BGMAC_FEAT_FORCE_SPEED_2500;
+ bgmac->feature_flags |= BGMAC_FEAT_SRAB;
break;
default:
bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -12,6 +12,7 @@
#include <linux/bcma/bcma.h>
#include <linux/etherdevice.h>
#include <linux/interrupt.h>
+#include <linux/platform_data/b53.h>
#include <linux/bcm47xx_nvram.h>
#include <linux/phy.h>
#include <linux/phy_fixed.h>
@@ -1408,6 +1409,17 @@ static const struct ethtool_ops bgmac_et
.set_link_ksettings = phy_ethtool_set_link_ksettings,
};
+static struct b53_platform_data bgmac_b53_pdata = {
+};
+
+static struct platform_device bgmac_b53_dev = {
+ .name = "b53-srab-switch",
+ .id = -1,
+ .dev = {
+ .platform_data = &bgmac_b53_pdata,
+ },
+};
+
/**************************************************
* MII
**************************************************/
@@ -1542,6 +1554,14 @@ int bgmac_enet_probe(struct bgmac *bgmac
/* Omit FCS from max MTU size */
net_dev->max_mtu = BGMAC_RX_MAX_FRAME_SIZE - ETH_FCS_LEN;
+ if ((bgmac->feature_flags & BGMAC_FEAT_SRAB) && !bgmac_b53_pdata.regs) {
+ bgmac_b53_pdata.regs = ioremap_nocache(0x18007000, 0x1000);
+
+ err = platform_device_register(&bgmac_b53_dev);
+ if (!err)
+ bgmac->b53_device = &bgmac_b53_dev;
+ }
+
err = register_netdev(bgmac->net_dev);
if (err) {
dev_err(bgmac->dev, "Cannot register net device\n");
@@ -1564,6 +1584,10 @@ EXPORT_SYMBOL_GPL(bgmac_enet_probe);
void bgmac_enet_remove(struct bgmac *bgmac)
{
+ if (bgmac->b53_device)
+ platform_device_unregister(&bgmac_b53_dev);
+ bgmac->b53_device = NULL;
+
unregister_netdev(bgmac->net_dev);
phy_disconnect(bgmac->net_dev->phydev);
netif_napi_del(&bgmac->napi);
--- a/drivers/net/ethernet/broadcom/bgmac.h
+++ b/drivers/net/ethernet/broadcom/bgmac.h
@@ -388,6 +388,7 @@
#define BGMAC_FEAT_CC4_IF_SW_TYPE_RGMII BIT(18)
#define BGMAC_FEAT_CC7_IF_TYPE_RGMII BIT(19)
#define BGMAC_FEAT_IDM_MASK BIT(20)
+#define BGMAC_FEAT_SRAB BIT(21)
struct bgmac_slot_info {
union {
@@ -493,6 +494,9 @@ struct bgmac {
void (*cmn_maskset32)(struct bgmac *bgmac, u16 offset, u32 mask,
u32 set);
int (*phy_connect)(struct bgmac *bgmac);
+
+ /* platform device for associated switch */
+ struct platform_device *b53_device;
};
struct bgmac *bgmac_alloc(struct device *dev);

View File

@ -0,0 +1,33 @@
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1088,6 +1088,7 @@ static const struct usb_device_id produc
{QMI_MATCH_FF_FF_FF(0x2c7c, 0x0620)}, /* Quectel EM160R-GL */
{QMI_MATCH_FF_FF_FF(0x2c7c, 0x0800)}, /* Quectel RM500Q-GL */
{QMI_MATCH_FF_FF_FF(0x2c7c, 0x0801)}, /* Quectel RM520N */
+ {QMI_MATCH_FF_FF_FF(0x05c6, 0xf601)}, /* MeigLink SLM750 */
/* 3. Combined interface devices matching on interface number */
{QMI_FIXED_INTF(0x0408, 0xea42, 4)}, /* Yota / Megafon M100-1 */
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -243,6 +243,8 @@ static void option_instat_callback(struc
#define UBLOX_PRODUCT_R6XX 0x90fa
/* These Yuga products use Qualcomm's vendor ID */
#define YUGA_PRODUCT_CLM920_NC5 0x9625
+/* These MeigLink products use Qualcomm's vendor ID */
+#define MEIGLINK_PRODUCT_SLM750 0xf601
#define QUECTEL_VENDOR_ID 0x2c7c
/* These Quectel products use Quectel's vendor ID */
@@ -1143,6 +1145,11 @@ static const struct usb_device_id option
.driver_info = ZLP },
{ USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_BG96),
.driver_info = RSVD(4) },
+ /* Meiglink products using Qualcomm vendor ID */
+ // Works OK. In case of some issues check macros that are used by Quectel Products
+ { USB_DEVICE_AND_INTERFACE_INFO(QUALCOMM_VENDOR_ID, MEIGLINK_PRODUCT_SLM750, 0xff, 0xff, 0xff),
+ .driver_info = NUMEP2 },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUALCOMM_VENDOR_ID, MEIGLINK_PRODUCT_SLM750, 0xff, 0, 0) },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0xff, 0xff),
.driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0, 0) },

View File

@ -0,0 +1,162 @@
From cc809a441d8f2924f785eb863dfa6aef47a25b0b Mon Sep 17 00:00:00 2001
From: John Crispin <blogic@openwrt.org>
Date: Tue, 12 Aug 2014 20:49:27 +0200
Subject: [PATCH 30/36] GPIO: add named gpio exports
Signed-off-by: John Crispin <blogic@openwrt.org>
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -19,6 +19,8 @@
#include <linux/pinctrl/pinctrl.h>
#include <linux/slab.h>
#include <linux/gpio/machine.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
#include "gpiolib.h"
#include "gpiolib-of.h"
@@ -1066,3 +1068,72 @@ void of_gpio_dev_init(struct gpio_chip *
else
gc->of_node = gdev->dev.of_node;
}
+
+#ifdef CONFIG_GPIO_SYSFS
+
+static struct of_device_id gpio_export_ids[] = {
+ { .compatible = "gpio-export" },
+ { /* sentinel */ }
+};
+
+static int of_gpio_export_probe(struct platform_device *pdev)
+{
+ struct device_node *np = pdev->dev.of_node;
+ struct device_node *cnp;
+ u32 val;
+ int nb = 0;
+
+ for_each_child_of_node(np, cnp) {
+ const char *name = NULL;
+ int gpio;
+ bool dmc;
+ int max_gpio = 1;
+ int i;
+
+ of_property_read_string(cnp, "gpio-export,name", &name);
+
+ if (!name)
+ max_gpio = of_gpio_count(cnp);
+
+ for (i = 0; i < max_gpio; i++) {
+ unsigned flags = 0;
+ enum of_gpio_flags of_flags;
+
+ gpio = of_get_gpio_flags(cnp, i, &of_flags);
+ if (!gpio_is_valid(gpio))
+ return gpio;
+
+ if (of_flags == OF_GPIO_ACTIVE_LOW)
+ flags |= GPIOF_ACTIVE_LOW;
+
+ if (!of_property_read_u32(cnp, "gpio-export,output", &val))
+ flags |= val ? GPIOF_OUT_INIT_HIGH : GPIOF_OUT_INIT_LOW;
+ else
+ flags |= GPIOF_IN;
+
+ if (devm_gpio_request_one(&pdev->dev, gpio, flags, name ? name : of_node_full_name(np)))
+ continue;
+
+ dmc = of_property_read_bool(cnp, "gpio-export,direction_may_change");
+ gpio_export_with_name(gpio, dmc, name);
+ nb++;
+ }
+ }
+
+ dev_info(&pdev->dev, "%d gpio(s) exported\n", nb);
+
+ return 0;
+}
+
+static struct platform_driver gpio_export_driver = {
+ .driver = {
+ .name = "gpio-export",
+ .owner = THIS_MODULE,
+ .of_match_table = of_match_ptr(gpio_export_ids),
+ },
+ .probe = of_gpio_export_probe,
+};
+
+module_platform_driver(gpio_export_driver);
+
+#endif
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -125,6 +125,12 @@ static inline int gpio_export(unsigned g
return gpiod_export(gpio_to_desc(gpio), direction_may_change);
}
+int __gpiod_export(struct gpio_desc *desc, bool direction_may_change, const char *name);
+static inline int gpio_export_with_name(unsigned gpio, bool direction_may_change, const char *name)
+{
+ return __gpiod_export(gpio_to_desc(gpio), direction_may_change, name);
+}
+
static inline int gpio_export_link(struct device *dev, const char *name,
unsigned gpio)
{
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -728,6 +728,7 @@ static inline struct gpio_desc *acpi_get
#if IS_ENABLED(CONFIG_GPIOLIB) && IS_ENABLED(CONFIG_GPIO_SYSFS)
+int _gpiod_export(struct gpio_desc *desc, bool direction_may_change, const char *name);
int gpiod_export(struct gpio_desc *desc, bool direction_may_change);
int gpiod_export_link(struct device *dev, const char *name,
struct gpio_desc *desc);
@@ -735,6 +736,13 @@ void gpiod_unexport(struct gpio_desc *de
#else /* CONFIG_GPIOLIB && CONFIG_GPIO_SYSFS */
+static inline int _gpiod_export(struct gpio_desc *desc,
+ bool direction_may_change,
+ const char *name)
+{
+ return -ENOSYS;
+}
+
static inline int gpiod_export(struct gpio_desc *desc,
bool direction_may_change)
{
--- a/drivers/gpio/gpiolib-sysfs.c
+++ b/drivers/gpio/gpiolib-sysfs.c
@@ -544,7 +544,7 @@ static struct class gpio_class = {
*
* Returns zero on success, else an error.
*/
-int gpiod_export(struct gpio_desc *desc, bool direction_may_change)
+int __gpiod_export(struct gpio_desc *desc, bool direction_may_change, const char *name)
{
struct gpio_chip *chip;
struct gpio_device *gdev;
@@ -606,6 +606,8 @@ int gpiod_export(struct gpio_desc *desc,
offset = gpio_chip_hwgpio(desc);
if (chip->names && chip->names[offset])
ioname = chip->names[offset];
+ if (name)
+ ioname = name;
dev = device_create_with_groups(&gpio_class, &gdev->dev,
MKDEV(0, 0), data, gpio_groups,
@@ -627,6 +629,12 @@ err_unlock:
gpiod_dbg(desc, "%s: status %d\n", __func__, status);
return status;
}
+EXPORT_SYMBOL_GPL(__gpiod_export);
+
+int gpiod_export(struct gpio_desc *desc, bool direction_may_change)
+{
+ return __gpiod_export(desc, direction_may_change, NULL);
+}
EXPORT_SYMBOL_GPL(gpiod_export);
static int match_export(struct device *dev, const void *desc)

View File

@ -0,0 +1,408 @@
From 9e3f1d0805b2d919904dd9a4ff0d956314cc3cba Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Sat, 8 Jul 2017 08:20:09 +0200
Subject: debloat: procfs
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
fs/locks.c | 2 ++
fs/proc/Kconfig | 5 +++++
fs/proc/consoles.c | 3 +++
fs/proc/proc_tty.c | 11 ++++++++++-
include/net/snmp.h | 18 +++++++++++++++++-
ipc/msg.c | 3 +++
ipc/sem.c | 2 ++
ipc/shm.c | 2 ++
ipc/util.c | 3 +++
kernel/exec_domain.c | 2 ++
kernel/irq/proc.c | 9 +++++++++
kernel/time/timer_list.c | 2 ++
mm/vmalloc.c | 2 ++
mm/vmstat.c | 8 +++++---
net/8021q/vlanproc.c | 6 ++++++
net/core/net-procfs.c | 18 ++++++++++++------
net/core/sock.c | 2 ++
net/ipv4/fib_trie.c | 18 ++++++++++++------
net/ipv4/proc.c | 3 +++
net/ipv4/route.c | 3 +++
20 files changed, 105 insertions(+), 17 deletions(-)
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2885,6 +2885,8 @@ static const struct seq_operations locks
static int __init proc_locks_init(void)
{
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED))
+ return 0;
proc_create_seq_private("locks", 0, NULL, &locks_seq_operations,
sizeof(struct locks_iterator), NULL);
return 0;
--- a/fs/proc/Kconfig
+++ b/fs/proc/Kconfig
@@ -100,6 +100,11 @@ config PROC_CHILDREN
Say Y if you are running any user-space software which takes benefit from
this interface. For example, rkt is such a piece of software.
+config PROC_STRIPPED
+ default n
+ depends on EXPERT
+ bool "Strip non-essential /proc functionality to reduce code size"
+
config PROC_PID_ARCH_STATUS
def_bool n
depends on PROC_FS
--- a/fs/proc/consoles.c
+++ b/fs/proc/consoles.c
@@ -92,6 +92,9 @@ static const struct seq_operations conso
static int __init proc_consoles_init(void)
{
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED))
+ return 0;
+
proc_create_seq("consoles", 0, NULL, &consoles_op);
return 0;
}
--- a/fs/proc/proc_tty.c
+++ b/fs/proc/proc_tty.c
@@ -131,7 +131,10 @@ static const struct seq_operations tty_d
void proc_tty_register_driver(struct tty_driver *driver)
{
struct proc_dir_entry *ent;
-
+
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED))
+ return;
+
if (!driver->driver_name || driver->proc_entry ||
!driver->ops->proc_show)
return;
@@ -148,6 +151,9 @@ void proc_tty_unregister_driver(struct t
{
struct proc_dir_entry *ent;
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED))
+ return;
+
ent = driver->proc_entry;
if (!ent)
return;
@@ -162,6 +168,9 @@ void proc_tty_unregister_driver(struct t
*/
void __init proc_tty_init(void)
{
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED))
+ return;
+
if (!proc_mkdir("tty", NULL))
return;
proc_mkdir("tty/ldisc", NULL); /* Preserved: it's userspace visible */
--- a/include/net/snmp.h
+++ b/include/net/snmp.h
@@ -124,6 +124,21 @@ struct linux_tls_mib {
#define DECLARE_SNMP_STAT(type, name) \
extern __typeof__(type) __percpu *name
+#ifdef CONFIG_PROC_STRIPPED
+#define __SNMP_STATS_DUMMY(mib) \
+ do { (void) mib->mibs[0]; } while(0)
+
+#define __SNMP_INC_STATS(mib, field) __SNMP_STATS_DUMMY(mib)
+#define SNMP_INC_STATS_ATOMIC_LONG(mib, field) __SNMP_STATS_DUMMY(mib)
+#define SNMP_INC_STATS(mib, field) __SNMP_STATS_DUMMY(mib)
+#define SNMP_DEC_STATS(mib, field) __SNMP_STATS_DUMMY(mib)
+#define __SNMP_ADD_STATS(mib, field, addend) __SNMP_STATS_DUMMY(mib)
+#define SNMP_ADD_STATS(mib, field, addend) __SNMP_STATS_DUMMY(mib)
+#define SNMP_UPD_PO_STATS(mib, basefield, addend) __SNMP_STATS_DUMMY(mib)
+#define __SNMP_UPD_PO_STATS(mib, basefield, addend) __SNMP_STATS_DUMMY(mib)
+
+#else
+
#define __SNMP_INC_STATS(mib, field) \
__this_cpu_inc(mib->mibs[field])
@@ -154,8 +169,9 @@ struct linux_tls_mib {
__this_cpu_add(ptr[basefield##OCTETS], addend); \
} while (0)
+#endif
-#if BITS_PER_LONG==32
+#if (BITS_PER_LONG==32) && !defined(CONFIG_PROC_STRIPPED)
#define __SNMP_ADD_STATS64(mib, field, addend) \
do { \
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -1350,6 +1350,9 @@ void __init msg_init(void)
{
msg_init_ns(&init_ipc_ns);
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED))
+ return;
+
ipc_init_proc_interface("sysvipc/msg",
" key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n",
IPC_MSG_IDS, sysvipc_msg_proc_show);
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -268,6 +268,8 @@ void sem_exit_ns(struct ipc_namespace *n
void __init sem_init(void)
{
sem_init_ns(&init_ipc_ns);
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED))
+ return;
ipc_init_proc_interface("sysvipc/sem",
" key semid perms nsems uid gid cuid cgid otime ctime\n",
IPC_SEM_IDS, sysvipc_sem_proc_show);
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -154,6 +154,8 @@ pure_initcall(ipc_ns_init);
void __init shm_init(void)
{
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED))
+ return;
ipc_init_proc_interface("sysvipc/shm",
#if BITS_PER_LONG <= 32
" key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n",
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -141,6 +141,9 @@ void __init ipc_init_proc_interface(cons
struct proc_dir_entry *pde;
struct ipc_proc_iface *iface;
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED))
+ return;
+
iface = kmalloc(sizeof(*iface), GFP_KERNEL);
if (!iface)
return;
--- a/kernel/exec_domain.c
+++ b/kernel/exec_domain.c
@@ -29,6 +29,8 @@ static int execdomains_proc_show(struct
static int __init proc_execdomains_init(void)
{
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED))
+ return 0;
proc_create_single("execdomains", 0, NULL, execdomains_proc_show);
return 0;
}
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -341,6 +341,9 @@ void register_irq_proc(unsigned int irq,
void __maybe_unused *irqp = (void *)(unsigned long) irq;
char name [MAX_NAMELEN];
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED) && !IS_ENABLED(CONFIG_SMP))
+ return;
+
if (!root_irq_dir || (desc->irq_data.chip == &no_irq_chip))
return;
@@ -394,6 +397,9 @@ void unregister_irq_proc(unsigned int ir
{
char name [MAX_NAMELEN];
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED) && !IS_ENABLED(CONFIG_SMP))
+ return;
+
if (!root_irq_dir || !desc->dir)
return;
#ifdef CONFIG_SMP
@@ -432,6 +438,9 @@ void init_irq_proc(void)
unsigned int irq;
struct irq_desc *desc;
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED) && !IS_ENABLED(CONFIG_SMP))
+ return;
+
/* create /proc/irq */
root_irq_dir = proc_mkdir("irq", NULL);
if (!root_irq_dir)
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -350,6 +350,8 @@ static int __init init_timer_list_procfs
{
struct proc_dir_entry *pe;
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED))
+ return 0;
pe = proc_create_seq_private("timer_list", 0400, NULL, &timer_list_sops,
sizeof(struct timer_list_iter), NULL);
if (!pe)
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -4160,6 +4160,8 @@ static const struct seq_operations vmall
static int __init proc_vmalloc_init(void)
{
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED))
+ return 0;
if (IS_ENABLED(CONFIG_NUMA))
proc_create_seq_private("vmallocinfo", 0400, NULL,
&vmalloc_op,
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -2127,10 +2127,12 @@ void __init init_mm_internals(void)
#endif
migrate_on_reclaim_init();
#ifdef CONFIG_PROC_FS
- proc_create_seq("buddyinfo", 0444, NULL, &fragmentation_op);
- proc_create_seq("pagetypeinfo", 0400, NULL, &pagetypeinfo_op);
+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED)) {
+ proc_create_seq("buddyinfo", 0444, NULL, &fragmentation_op);
+ proc_create_seq("pagetypeinfo", 0400, NULL, &pagetypeinfo_op);
+ proc_create_seq("zoneinfo", 0444, NULL, &zoneinfo_op);
+ }
proc_create_seq("vmstat", 0444, NULL, &vmstat_op);
- proc_create_seq("zoneinfo", 0444, NULL, &zoneinfo_op);
#endif
}
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -93,6 +93,9 @@ void vlan_proc_cleanup(struct net *net)
{
struct vlan_net *vn = net_generic(net, vlan_net_id);
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED))
+ return;
+
if (vn->proc_vlan_conf)
remove_proc_entry(name_conf, vn->proc_vlan_dir);
@@ -112,6 +115,9 @@ int __net_init vlan_proc_init(struct net
{
struct vlan_net *vn = net_generic(net, vlan_net_id);
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED))
+ return 0;
+
vn->proc_vlan_dir = proc_net_mkdir(net, name_root, net->proc_net);
if (!vn->proc_vlan_dir)
goto err;
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -319,10 +319,12 @@ static int __net_init dev_proc_net_init(
if (!proc_create_net("dev", 0444, net->proc_net, &dev_seq_ops,
sizeof(struct seq_net_private)))
goto out;
- if (!proc_create_seq("softnet_stat", 0444, net->proc_net,
+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED) &&
+ !proc_create_seq("softnet_stat", 0444, net->proc_net,
&softnet_seq_ops))
goto out_dev;
- if (!proc_create_net("ptype", 0444, net->proc_net, &ptype_seq_ops,
+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED) &&
+ !proc_create_net("ptype", 0444, net->proc_net, &ptype_seq_ops,
sizeof(struct seq_net_private)))
goto out_softnet;
@@ -332,9 +334,11 @@ static int __net_init dev_proc_net_init(
out:
return rc;
out_ptype:
- remove_proc_entry("ptype", net->proc_net);
+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED))
+ remove_proc_entry("ptype", net->proc_net);
out_softnet:
- remove_proc_entry("softnet_stat", net->proc_net);
+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED))
+ remove_proc_entry("softnet_stat", net->proc_net);
out_dev:
remove_proc_entry("dev", net->proc_net);
goto out;
@@ -344,8 +348,10 @@ static void __net_exit dev_proc_net_exit
{
wext_proc_exit(net);
- remove_proc_entry("ptype", net->proc_net);
- remove_proc_entry("softnet_stat", net->proc_net);
+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED)) {
+ remove_proc_entry("ptype", net->proc_net);
+ remove_proc_entry("softnet_stat", net->proc_net);
+ }
remove_proc_entry("dev", net->proc_net);
}
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -4005,6 +4005,8 @@ static __net_initdata struct pernet_oper
static int __init proto_init(void)
{
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED))
+ return 0;
return register_pernet_subsys(&proto_net_ops);
}
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -3029,11 +3029,13 @@ static const struct seq_operations fib_r
int __net_init fib_proc_init(struct net *net)
{
- if (!proc_create_net("fib_trie", 0444, net->proc_net, &fib_trie_seq_ops,
+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED) &&
+ !proc_create_net("fib_trie", 0444, net->proc_net, &fib_trie_seq_ops,
sizeof(struct fib_trie_iter)))
goto out1;
- if (!proc_create_net_single("fib_triestat", 0444, net->proc_net,
+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED) &&
+ !proc_create_net_single("fib_triestat", 0444, net->proc_net,
fib_triestat_seq_show, NULL))
goto out2;
@@ -3044,17 +3046,21 @@ int __net_init fib_proc_init(struct net
return 0;
out3:
- remove_proc_entry("fib_triestat", net->proc_net);
+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED))
+ remove_proc_entry("fib_triestat", net->proc_net);
out2:
- remove_proc_entry("fib_trie", net->proc_net);
+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED))
+ remove_proc_entry("fib_trie", net->proc_net);
out1:
return -ENOMEM;
}
void __net_exit fib_proc_exit(struct net *net)
{
- remove_proc_entry("fib_trie", net->proc_net);
- remove_proc_entry("fib_triestat", net->proc_net);
+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED)) {
+ remove_proc_entry("fib_trie", net->proc_net);
+ remove_proc_entry("fib_triestat", net->proc_net);
+ }
remove_proc_entry("route", net->proc_net);
}
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -553,5 +553,8 @@ static __net_initdata struct pernet_oper
int __init ip_misc_proc_init(void)
{
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED))
+ return 0;
+
return register_pernet_subsys(&ip_proc_ops);
}
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -380,6 +380,9 @@ static struct pernet_operations ip_rt_pr
static int __init ip_rt_proc_init(void)
{
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED))
+ return 0;
+
return register_pernet_subsys(&ip_rt_proc_ops);
}

View File

@ -0,0 +1,93 @@
From e3692cb2fcd5ba1244512a0f43b8118f65f1c375 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Sat, 8 Jul 2017 08:20:43 +0200
Subject: debloat: dmabuf
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
drivers/base/Kconfig | 2 +-
drivers/dma-buf/Makefile | 10 +++++++---
drivers/dma-buf/dma-buf.c | 4 +++-
kernel/sched/core.c | 1 +
4 files changed, 12 insertions(+), 5 deletions(-)
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -198,7 +198,7 @@ config SOC_BUS
source "drivers/base/regmap/Kconfig"
config DMA_SHARED_BUFFER
- bool
+ tristate
default n
select IRQ_WORK
help
--- a/drivers/dma-buf/heaps/Makefile
+++ b/drivers/dma-buf/heaps/Makefile
@@ -1,3 +1,3 @@
# SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_DMABUF_HEAPS_SYSTEM) += system_heap.o
-obj-$(CONFIG_DMABUF_HEAPS_CMA) += cma_heap.o
+dma-buf-objs-$(CONFIG_DMABUF_HEAPS_SYSTEM) += system_heap.o
+dma-buf-objs-$(CONFIG_DMABUF_HEAPS_CMA) += cma_heap.o
--- a/drivers/dma-buf/Makefile
+++ b/drivers/dma-buf/Makefile
@@ -1,12 +1,14 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-y := dma-buf.o dma-fence.o dma-fence-array.o dma-fence-chain.o \
+obj-$(CONFIG_DMA_SHARED_BUFFER) := dma-shared-buffer.o
+
+dma-buf-objs-y := dma-buf.o dma-fence.o dma-fence-array.o dma-fence-chain.o \
dma-fence-unwrap.o dma-resv.o
-obj-$(CONFIG_DMABUF_HEAPS) += dma-heap.o
-obj-$(CONFIG_DMABUF_HEAPS) += heaps/
-obj-$(CONFIG_SYNC_FILE) += sync_file.o
-obj-$(CONFIG_SW_SYNC) += sw_sync.o sync_debug.o
-obj-$(CONFIG_UDMABUF) += udmabuf.o
-obj-$(CONFIG_DMABUF_SYSFS_STATS) += dma-buf-sysfs-stats.o
+dma-buf-objs-$(CONFIG_DMABUF_HEAPS) += dma-heap.o
+obj-$(CONFIG_DMABUF_HEAPS) += heaps/
+dma-buf-objs-$(CONFIG_SYNC_FILE) += sync_file.o
+dma-buf-objs-$(CONFIG_SW_SYNC) += sw_sync.o sync_debug.o
+dma-buf-objs-$(CONFIG_UDMABUF) += udmabuf.o
+dma-buf-objs-$(CONFIG_DMABUF_SYSFS_STATS) += udmabuf.o
dmabuf_selftests-y := \
selftest.o \
@@ -15,4 +17,6 @@ dmabuf_selftests-y := \
st-dma-fence-unwrap.o \
st-dma-resv.o
-obj-$(CONFIG_DMABUF_SELFTESTS) += dmabuf_selftests.o
+dma-buf-objs-$(CONFIG_DMABUF_SELFTESTS) += dmabuf_selftests.o
+
+dma-shared-buffer-objs := $(dma-buf-objs-y)
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -1578,4 +1578,5 @@ static void __exit dma_buf_deinit(void)
kern_unmount(dma_buf_mnt);
dma_buf_uninit_sysfs_statistics();
}
-__exitcall(dma_buf_deinit);
+module_exit(dma_buf_deinit);
+MODULE_LICENSE("GPL");
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4319,6 +4319,7 @@ int wake_up_state(struct task_struct *p,
{
return try_to_wake_up(p, state, 0);
}
+EXPORT_SYMBOL_GPL(wake_up_state);
/*
* Perform scheduler related setup for a newly forked process p.
--- a/fs/d_path.c
+++ b/fs/d_path.c
@@ -314,6 +314,7 @@ char *dynamic_dname(struct dentry *dentr
buffer += buflen - sz;
return memcpy(buffer, temp, sz);
}
+EXPORT_SYMBOL_GPL(dynamic_dname);
char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
{

View File

@ -0,0 +1,32 @@
From 0d37e6edc09c99e683dd91ca0e83bbc0df8477b3 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Sun, 16 Jul 2017 16:56:10 +0200
Subject: lib: add uevent_next_seqnum()
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
include/linux/kobject.h | 5 +++++
lib/kobject_uevent.c | 37 +++++++++++++++++++++++++++++++++++++
2 files changed, 42 insertions(+)
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -179,6 +179,18 @@ out:
return r;
}
+u64 uevent_next_seqnum(void)
+{
+ u64 seq;
+
+ mutex_lock(&uevent_sock_mutex);
+ seq = ++uevent_seqnum;
+ mutex_unlock(&uevent_sock_mutex);
+
+ return seq;
+}
+EXPORT_SYMBOL_GPL(uevent_next_seqnum);
+
/**
* kobject_synth_uevent - send synthetic uevent with arguments
*

View File

@ -0,0 +1,76 @@
From 0d37e6edc09c99e683dd91ca0e83bbc0df8477b3 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Sun, 16 Jul 2017 16:56:10 +0200
Subject: lib: add uevent_next_seqnum()
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
include/linux/kobject.h | 5 +++++
lib/kobject_uevent.c | 37 +++++++++++++++++++++++++++++++++++++
2 files changed, 42 insertions(+)
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -32,6 +32,8 @@
#define UEVENT_NUM_ENVP 64 /* number of env pointers */
#define UEVENT_BUFFER_SIZE 2048 /* buffer for the variables */
+struct sk_buff;
+
#ifdef CONFIG_UEVENT_HELPER
/* path to the userspace helper executed on an event */
extern char uevent_helper[];
@@ -224,4 +226,7 @@ int kobject_synth_uevent(struct kobject
__printf(2, 3)
int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...);
+int broadcast_uevent(struct sk_buff *skb, __u32 pid, __u32 group,
+ gfp_t allocation);
+
#endif /* _KOBJECT_H_ */
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -691,6 +691,43 @@ int add_uevent_var(struct kobj_uevent_en
EXPORT_SYMBOL_GPL(add_uevent_var);
#if defined(CONFIG_NET)
+int broadcast_uevent(struct sk_buff *skb, __u32 pid, __u32 group,
+ gfp_t allocation)
+{
+ struct uevent_sock *ue_sk;
+ int err = 0;
+
+ /* send netlink message */
+ mutex_lock(&uevent_sock_mutex);
+ list_for_each_entry(ue_sk, &uevent_sock_list, list) {
+ struct sock *uevent_sock = ue_sk->sk;
+ struct sk_buff *skb2;
+
+ skb2 = skb_clone(skb, allocation);
+ if (!skb2)
+ break;
+
+ err = netlink_broadcast(uevent_sock, skb2, pid, group,
+ allocation);
+ if (err)
+ break;
+ }
+ mutex_unlock(&uevent_sock_mutex);
+
+ kfree_skb(skb);
+ return err;
+}
+#else
+int broadcast_uevent(struct sk_buff *skb, __u32 pid, __u32 group,
+ gfp_t allocation)
+{
+ kfree_skb(skb);
+ return 0;
+}
+#endif
+EXPORT_SYMBOL_GPL(broadcast_uevent);
+
+#if defined(CONFIG_NET)
static int uevent_net_broadcast(struct sock *usk, struct sk_buff *skb,
struct netlink_ext_ack *extack)
{

View File

@ -0,0 +1,12 @@
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -1179,6 +1179,9 @@ int __init early_init_dt_scan_chosen(cha
p = of_get_flat_dt_prop(node, "bootargs", &l);
if (p != NULL && l > 0)
strlcpy(cmdline, p, min(l, COMMAND_LINE_SIZE));
+ p = of_get_flat_dt_prop(node, "bootargs-append", &l);
+ if (p != NULL && l > 0)
+ strlcat(cmdline, p, min_t(int, strlen(cmdline) + (int)l, COMMAND_LINE_SIZE));
/*
* CONFIG_CMDLINE is meant to be a default in case nothing else

View File

@ -0,0 +1,352 @@
From 42824d4b753f84ccf885eca602c5037338b546c8 Mon Sep 17 00:00:00 2001
From: Zhi Chen <zhichen@codeaurora.org>
Date: Tue, 13 Jan 2015 14:28:18 -0800
Subject: [PATCH 3/3] net: conntrack events, support multiple registrant
Merging this patch from kernel 3.4:
This was supported by old (.28) kernel versions but removed
because of it's overhead.
But we need this feature for NA connection manager. Both ipv4
and ipv6 modules needs to register themselves to ct events.
Change-Id: Iebfb254590fb594f5baf232f849d1b7ae45ef757
Signed-off-by: Zhi Chen <zhichen@codeaurora.org>
---
include/net/netfilter/nf_conntrack_ecache.h | 15 ++-
include/net/netns/conntrack.h | 3 +
net/netfilter/Kconfig | 8 ++
net/netfilter/nf_conntrack_core.c | 4 +
net/netfilter/nf_conntrack_ecache.c | 103 +++++++++++++++++++-
net/netfilter/nf_conntrack_netlink.c | 17 ++++
6 files changed, 146 insertions(+), 4 deletions(-)
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -65,9 +65,14 @@ struct nf_ct_event_notifier {
int (*exp_event)(unsigned int events, const struct nf_exp_event *item);
};
-void nf_conntrack_register_notifier(struct net *net,
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
+extern int nf_conntrack_register_notifier(struct net *net, struct notifier_block *nb);
+extern int nf_conntrack_unregister_notifier(struct net *net, struct notifier_block *nb);
+#else
+int nf_conntrack_register_notifier(struct net *net,
const struct nf_ct_event_notifier *nb);
void nf_conntrack_unregister_notifier(struct net *net);
+#endif
void nf_ct_deliver_cached_events(struct nf_conn *ct);
int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct,
@@ -98,11 +103,13 @@ static inline void
nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct)
{
#ifdef CONFIG_NF_CONNTRACK_EVENTS
- struct net *net = nf_ct_net(ct);
struct nf_conntrack_ecache *e;
+#ifndef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
+ struct net *net = nf_ct_net(ct);
if (!rcu_access_pointer(net->ct.nf_conntrack_event_cb))
return;
+#endif
e = nf_ct_ecache_find(ct);
if (e == NULL)
@@ -117,20 +124,34 @@ nf_conntrack_event_report(enum ip_conntr
u32 portid, int report)
{
#ifdef CONFIG_NF_CONNTRACK_EVENTS
- if (nf_ct_ecache_exist(ct))
- return nf_conntrack_eventmask_report(1 << event, ct, portid, report);
+#ifndef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
+ const struct net *net = nf_ct_net(ct);
+
+ if (!rcu_access_pointer(net->ct.nf_conntrack_event_cb))
+ return 0;
#endif
+
+ return nf_conntrack_eventmask_report(1 << event, ct, portid, report);
+#else
return 0;
+#endif
}
static inline int
nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct)
{
#ifdef CONFIG_NF_CONNTRACK_EVENTS
- if (nf_ct_ecache_exist(ct))
- return nf_conntrack_eventmask_report(1 << event, ct, 0, 0);
+#ifndef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
+ const struct net *net = nf_ct_net(ct);
+
+ if (!rcu_access_pointer(net->ct.nf_conntrack_event_cb))
+ return 0;
#endif
+
+ return nf_conntrack_eventmask_report(1 << event, ct, 0, 0);
+#else
return 0;
+#endif
}
#ifdef CONFIG_NF_CONNTRACK_EVENTS
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -106,6 +106,9 @@ struct netns_ct {
u8 sysctl_checksum;
struct ip_conntrack_stat __percpu *stat;
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
+ struct atomic_notifier_head nf_conntrack_chain;
+#endif
struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb;
struct nf_ip_net nf_ct_proto;
#if defined(CONFIG_NF_CONNTRACK_LABELS)
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -161,6 +161,14 @@ config NF_CONNTRACK_EVENTS
If unsure, say `N'.
+config NF_CONNTRACK_CHAIN_EVENTS
+ bool "Register multiple callbacks to ct events"
+ depends on NF_CONNTRACK_EVENTS
+ help
+ Support multiple registrations.
+
+ If unsure, say `N'.
+
config NF_CONNTRACK_TIMEOUT
bool 'Connection tracking timeout'
depends on NETFILTER_ADVANCED
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -2803,6 +2803,10 @@ int nf_conntrack_init_net(struct net *ne
nf_conntrack_ecache_pernet_init(net);
nf_conntrack_proto_pernet_init(net);
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
+ ATOMIC_INIT_NOTIFIER_HEAD(&net->ct.nf_conntrack_chain);
+#endif
+
return 0;
err_expect:
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -17,6 +17,9 @@
#include <linux/stddef.h>
#include <linux/err.h>
#include <linux/kernel.h>
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
+#include <linux/notifier.h>
+#endif
#include <linux/netdevice.h>
#include <linux/slab.h>
#include <linux/export.h>
@@ -162,6 +165,35 @@ static int __nf_conntrack_eventmask_repo
return ret;
}
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
+int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct,
+ u32 portid, int report)
+{
+ struct nf_conntrack_ecache *e;
+ struct net *net = nf_ct_net(ct);
+
+ e = nf_ct_ecache_find(ct);
+ if (e == NULL)
+ return 0;
+
+ if (nf_ct_is_confirmed(ct)) {
+ struct nf_ct_event item = {
+ .ct = ct,
+ .portid = e->portid ? e->portid : portid,
+ .report = report
+ };
+ /* This is a resent of a destroy event? If so, skip missed */
+ unsigned long missed = e->portid ? 0 : e->missed;
+
+ if (!((eventmask | missed) & e->ctmask))
+ return 0;
+
+ atomic_notifier_call_chain(&net->ct.nf_conntrack_chain, eventmask | missed, &item);
+ }
+
+ return 0;
+}
+#else
int nf_conntrack_eventmask_report(unsigned int events, struct nf_conn *ct,
u32 portid, int report)
{
@@ -197,10 +229,52 @@ int nf_conntrack_eventmask_report(unsign
return ret;
}
+#endif
EXPORT_SYMBOL_GPL(nf_conntrack_eventmask_report);
/* deliver cached events and clear cache entry - must be called with locally
* disabled softirqs */
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
+void nf_ct_deliver_cached_events(struct nf_conn *ct)
+{
+ unsigned long events, missed;
+ struct nf_conntrack_ecache *e;
+ struct nf_ct_event item;
+ struct net *net = nf_ct_net(ct);
+
+ e = nf_ct_ecache_find(ct);
+ if (e == NULL)
+ return;
+
+ events = xchg(&e->cache, 0);
+
+ if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct) || !events)
+ return;
+
+ /* We make a copy of the missed event cache without taking
+ * the lock, thus we may send missed events twice. However,
+ * this does not harm and it happens very rarely. */
+ missed = e->missed;
+
+ if (!((events | missed) & e->ctmask))
+ return;
+
+ item.ct = ct;
+ item.portid = 0;
+ item.report = 0;
+
+ atomic_notifier_call_chain(&net->ct.nf_conntrack_chain,
+ events | missed,
+ &item);
+
+ if (likely(!missed))
+ return;
+
+ spin_lock_bh(&ct->lock);
+ e->missed &= ~missed;
+ spin_unlock_bh(&ct->lock);
+}
+#else
void nf_ct_deliver_cached_events(struct nf_conn *ct)
{
struct nf_conntrack_ecache *e;
@@ -226,6 +300,7 @@ void nf_ct_deliver_cached_events(struct
*/
__nf_conntrack_eventmask_report(e, events, e->missed, &item);
}
+#endif
EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
void nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
@@ -258,20 +333,43 @@ out_unlock:
rcu_read_unlock();
}
-void nf_conntrack_register_notifier(struct net *net,
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
+int nf_conntrack_register_notifier(struct net *net,
+ struct notifier_block *nb)
+{
+ return atomic_notifier_chain_register(&net->ct.nf_conntrack_chain, nb);
+}
+#else
+int nf_conntrack_register_notifier(struct net *net,
const struct nf_ct_event_notifier *new)
{
+ int ret;
struct nf_ct_event_notifier *notify;
mutex_lock(&nf_ct_ecache_mutex);
notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb,
lockdep_is_held(&nf_ct_ecache_mutex));
WARN_ON_ONCE(notify);
+ if (notify != NULL) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+
rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new);
- mutex_unlock(&nf_ct_ecache_mutex);
+ ret = 0;
+out_unlock:
+ mutex_unlock(&nf_ct_ecache_mutex);
+ return ret;
}
+#endif
EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
+int nf_conntrack_unregister_notifier(struct net *net, struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(&net->ct.nf_conntrack_chain, nb);
+}
+#else
void nf_conntrack_unregister_notifier(struct net *net)
{
mutex_lock(&nf_ct_ecache_mutex);
@@ -279,6 +377,7 @@ void nf_conntrack_unregister_notifier(st
mutex_unlock(&nf_ct_ecache_mutex);
/* synchronize_rcu() is called after netns pre_exit */
}
+#endif
EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state)
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -712,12 +712,19 @@ static size_t ctnetlink_nlmsg_size(const
}
static int
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
+ctnetlink_conntrack_event(struct notifier_block *this, unsigned long events, void *ptr)
+#else
ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item)
+#endif
{
const struct nf_conntrack_zone *zone;
struct net *net;
struct nlmsghdr *nlh;
struct nlattr *nest_parms;
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
+ struct nf_ct_event *item = (struct nf_ct_event *)ptr;
+#endif
struct nf_conn *ct = item->ct;
struct sk_buff *skb;
unsigned int type;
@@ -3745,11 +3752,17 @@ static int ctnetlink_stat_exp_cpu(struct
}
#ifdef CONFIG_NF_CONNTRACK_EVENTS
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
+static struct notifier_block ctnl_notifier = {
+ .notifier_call = ctnetlink_conntrack_event
+};
+#else
static struct nf_ct_event_notifier ctnl_notifier = {
.ct_event = ctnetlink_conntrack_event,
.exp_event = ctnetlink_expect_event,
};
#endif
+#endif
static const struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = {
[IPCTNL_MSG_CT_NEW] = {
@@ -3848,8 +3861,12 @@ static int __net_init ctnetlink_net_init
static void ctnetlink_net_pre_exit(struct net *net)
{
#ifdef CONFIG_NF_CONNTRACK_EVENTS
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
+ nf_conntrack_unregister_notifier(net,&ctnl_notifier);
+#else
nf_conntrack_unregister_notifier(net);
#endif
+#endif
}
static struct pernet_operations ctnetlink_net_ops = {

View File

@ -0,0 +1,204 @@
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -69,6 +69,9 @@ void brioctl_set(int (*hook)(struct net
int br_ioctl_call(struct net *net, struct net_bridge *br, unsigned int cmd,
struct ifreq *ifr, void __user *uarg);
+extern void br_dev_update_stats(struct net_device *dev,
+ struct rtnl_link_stats64 *nlstats);
+
#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING)
int br_multicast_list_adjacent(struct net_device *dev,
struct list_head *br_ip_list);
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -979,6 +979,10 @@ struct sk_buff {
__u8 csum_not_inet:1;
__u8 scm_io_uring:1;
+#ifdef CONFIG_SHORTCUT_FE
+ __u8 fast_forwarded:1;
+#endif
+
#ifdef CONFIG_NET_SCHED
__u16 tc_index; /* traffic control index */
#endif
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -18,6 +18,10 @@ struct timer_list {
void (*function)(struct timer_list *);
u32 flags;
+#ifdef CONFIG_SHORTCUT_FE
+ unsigned long cust_data;
+#endif
+
#ifdef CONFIG_LOCKDEP
struct lockdep_map lockdep_map;
#endif
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -68,6 +68,8 @@ struct nf_ct_event_notifier {
#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
extern int nf_conntrack_register_notifier(struct net *net, struct notifier_block *nb);
extern int nf_conntrack_unregister_notifier(struct net *net, struct notifier_block *nb);
+extern int nf_conntrack_register_chain_notifier(struct net *net, struct notifier_block *nb);
+extern int nf_conntrack_unregister_chain_notifier(struct net *net, struct notifier_block *nb);
#else
int nf_conntrack_register_notifier(struct net *net,
const struct nf_ct_event_notifier *nb);
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -460,6 +460,9 @@ config FAILOVER
migration of VMs with direct attached VFs by failing over to the
paravirtual datapath when the VF is unplugged.
+config SHORTCUT_FE
+ bool "Enables kernel network stack path for Shortcut Forwarding Engine"
+
config ETHTOOL_NETLINK
bool "Netlink interface for ethtool"
default y
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -774,6 +774,28 @@ void br_port_flags_change(struct net_bri
br_recalculate_neigh_suppress_enabled(br);
}
+void br_dev_update_stats(struct net_device *dev,
+ struct rtnl_link_stats64 *nlstats)
+{
+
+ struct pcpu_sw_netstats *stats;
+
+ /* Is this a bridge? */
+ if (!(dev->priv_flags & IFF_EBRIDGE))
+ return;
+
+
+ stats = this_cpu_ptr(dev->tstats);
+
+ u64_stats_update_begin(&stats->syncp);
+ u64_stats_add(&stats->rx_packets, nlstats->rx_packets);
+ u64_stats_add(&stats->rx_bytes, nlstats->rx_bytes);
+ u64_stats_add(&stats->tx_packets, nlstats->tx_packets);
+ u64_stats_add(&stats->tx_bytes, nlstats->tx_bytes);
+ u64_stats_update_end(&stats->syncp);
+}
+EXPORT_SYMBOL_GPL(br_dev_update_stats);
+
bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag)
{
struct net_bridge_port *p;
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3581,9 +3581,17 @@ static int xmit_one(struct sk_buff *skb,
{
unsigned int len;
int rc;
-
+#ifdef CONFIG_SHORTCUT_FE
+ /* If this skb has been fast forwarded then we don't want it to
+ * go to any taps (by definition we're trying to bypass them).
+ */
+ if (!skb->fast_forwarded) {
+#endif
if (dev_nit_active(dev))
dev_queue_xmit_nit(skb, dev);
+#ifdef CONFIG_SHORTCUT_FE
+ }
+#endif
len = skb->len;
trace_net_dev_start_xmit(skb, dev);
@@ -5233,6 +5241,11 @@ void netdev_rx_handler_unregister(struct
}
EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
+#ifdef CONFIG_SHORTCUT_FE
+int (*athrs_fast_nat_recv)(struct sk_buff *skb) __rcu __read_mostly;
+EXPORT_SYMBOL_GPL(athrs_fast_nat_recv);
+#endif
+
/*
* Limit the use of PFMEMALLOC reserves to those protocols that implement
* the special handling of PFMEMALLOC skbs.
@@ -5281,6 +5294,10 @@ static int __netif_receive_skb_core(stru
int ret = NET_RX_DROP;
__be16 type;
+#ifdef CONFIG_SHORTCUT_FE
+ int (*fast_recv)(struct sk_buff *skb);
+#endif
+
net_timestamp_check(!READ_ONCE(netdev_tstamp_prequeue), skb);
trace_netif_receive_skb(skb);
@@ -5318,6 +5335,15 @@ another_round:
goto out;
}
+#ifdef CONFIG_SHORTCUT_FE
+ fast_recv = rcu_dereference(athrs_fast_nat_recv);
+ if (fast_recv) {
+ if (fast_recv(skb)) {
+ ret = NET_RX_SUCCESS;
+ goto out;
+ }
+ }
+#endif
if (skb_skip_tc_classify(skb))
goto skip_classify;
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -143,12 +143,23 @@ static int __nf_conntrack_eventmask_repo
rcu_read_lock();
notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
- if (!notify) {
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
+ if (!notify && !rcu_dereference_raw(net->ct.nf_conntrack_chain.head))
+#else
+ if (!notify)
+#endif
+ {
rcu_read_unlock();
return 0;
}
-
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
+ ret = atomic_notifier_call_chain(&net->ct.nf_conntrack_chain,
+ events | missed, &item);
+ if (notify)
+ ret = notify->ct_event(events | missed, item);
+#else
ret = notify->ct_event(events | missed, item);
+#endif
rcu_read_unlock();
if (likely(ret >= 0 && missed == 0))
@@ -339,6 +350,11 @@ int nf_conntrack_register_notifier(struc
{
return atomic_notifier_chain_register(&net->ct.nf_conntrack_chain, nb);
}
+int nf_conntrack_register_chain_notifier(struct net *net, struct notifier_block *nb)
+{
+ return atomic_notifier_chain_register(&net->ct.nf_conntrack_chain, nb);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_register_chain_notifier);
#else
int nf_conntrack_register_notifier(struct net *net,
const struct nf_ct_event_notifier *new)
@@ -369,6 +385,11 @@ int nf_conntrack_unregister_notifier(str
{
return atomic_notifier_chain_unregister(&net->ct.nf_conntrack_chain, nb);
}
+int nf_conntrack_unregister_chain_notifier(struct net *net, struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(&net->ct.nf_conntrack_chain, nb);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_unregister_chain_notifier);
#else
void nf_conntrack_unregister_notifier(struct net *net)
{

View File

@ -0,0 +1,12 @@
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -956,6 +956,9 @@ static int wireless_process_ioctl(struct
else if (private)
return private(dev, iwr, cmd, info, handler);
}
+ /* Old driver API : call driver ioctl handler */
+ if (dev->netdev_ops->ndo_do_ioctl)
+ return dev->netdev_ops->ndo_do_ioctl(dev, (struct ifreq *) iwr, cmd);
return -EOPNOTSUPP;
}

View File

@ -0,0 +1,29 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Thu, 22 Oct 2020 22:00:03 +0200
Subject: [PATCH] compiler.h: only include asm/rwonce.h for kernel code
This header file is not in uapi, which makes any user space code that includes
linux/compiler.h to fail with the error 'asm/rwonce.h: No such file or directory'
Fixes: e506ea451254 ("compiler.h: Split {READ,WRITE}_ONCE definitions out into rwonce.h")
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -213,6 +213,8 @@ void ftrace_likely_update(struct ftrace_
#define function_nocfi(x) (x)
#endif
+#include <asm/rwonce.h>
+
#endif /* __KERNEL__ */
/*
@@ -251,6 +253,4 @@ static inline void *offset_to_ptr(const
*/
#define prevent_tail_call_optimization() mb()
-#include <asm/rwonce.h>
-
#endif /* __LINUX_COMPILER_H */

View File

@ -0,0 +1,11 @@
--- a/include/uapi/linux/swab.h
+++ b/include/uapi/linux/swab.h
@@ -3,7 +3,7 @@
#define _UAPI_LINUX_SWAB_H
#include <linux/types.h>
-#include <linux/compiler.h>
+#include <linux/stddef.h>
#include <asm/bitsperlong.h>
#include <asm/swab.h>

View File

@ -0,0 +1,57 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 18 Apr 2018 10:50:05 +0200
Subject: [PATCH] MIPS: only process negative stack offsets on stack traces
Fixes endless back traces in cases where the compiler emits a stack
pointer increase in a branch delay slot (probably for some form of
function return).
[ 3.475442] BUG: MAX_STACK_TRACE_ENTRIES too low!
[ 3.480070] turning off the locking correctness validator.
[ 3.485521] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.14.34 #0
[ 3.491475] Stack : 00000000 00000000 00000000 00000000 80e0fce2 00000034 00000000 00000000
[ 3.499764] 87c3838c 80696377 8061047c 00000000 00000001 00000001 87c2d850 6534689f
[ 3.508059] 00000000 00000000 80e10000 00000000 00000000 000000cf 0000000f 00000000
[ 3.516353] 00000000 806a0000 00076891 00000000 00000000 00000000 ffffffff 00000000
[ 3.524648] 806c0000 00000004 80e10000 806a0000 00000003 80690000 00000000 80700000
[ 3.532942] ...
[ 3.535362] Call Trace:
[ 3.537818] [<80010a48>] show_stack+0x58/0x100
[ 3.542207] [<804c2f78>] dump_stack+0xe8/0x170
[ 3.546613] [<80079f90>] save_trace+0xf0/0x110
[ 3.551010] [<8007b1ec>] mark_lock+0x33c/0x78c
[ 3.555413] [<8007bf48>] __lock_acquire+0x2ac/0x1a08
[ 3.560337] [<8007de60>] lock_acquire+0x64/0x8c
[ 3.564846] [<804e1570>] _raw_spin_lock_irqsave+0x54/0x78
[ 3.570186] [<801b618c>] kernfs_notify+0x94/0xac
[ 3.574770] [<801b7b10>] sysfs_notify+0x74/0xa0
[ 3.579257] [<801b618c>] kernfs_notify+0x94/0xac
[ 3.583839] [<801b7b10>] sysfs_notify+0x74/0xa0
[ 3.588329] [<801b618c>] kernfs_notify+0x94/0xac
[ 3.592911] [<801b7b10>] sysfs_notify+0x74/0xa0
[ 3.597401] [<801b618c>] kernfs_notify+0x94/0xac
[ 3.601983] [<801b7b10>] sysfs_notify+0x74/0xa0
[ 3.606473] [<801b618c>] kernfs_notify+0x94/0xac
[ 3.611055] [<801b7b10>] sysfs_notify+0x74/0xa0
[ 3.615545] [<801b618c>] kernfs_notify+0x94/0xac
[ 3.620125] [<801b7b10>] sysfs_notify+0x74/0xa0
[ 3.624619] [<801b618c>] kernfs_notify+0x94/0xac
[ 3.629197] [<801b7b10>] sysfs_notify+0x74/0xa0
[ 3.633691] [<801b618c>] kernfs_notify+0x94/0xac
[ 3.638269] [<801b7b10>] sysfs_notify+0x74/0xa0
[ 3.642763] [<801b618c>] kernfs_notify+0x94/0xac
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -394,6 +394,8 @@ static inline int is_sp_move_ins(union m
if (ip->i_format.opcode == addiu_op ||
ip->i_format.opcode == daddiu_op) {
+ if (ip->i_format.simmediate > 0)
+ return 0;
*frame_size = -ip->i_format.simmediate;
return 1;
}

View File

@ -0,0 +1,82 @@
From: Tobias Wolf <dev-NTEO@vplace.de>
Subject: mm: Fix alloc_node_mem_map with ARCH_PFN_OFFSET calculation
An rt288x (ralink) based router (Belkin F5D8235 v1) does not boot with any
kernel beyond version 4.3 resulting in:
BUG: Bad page state in process swapper pfn:086ac
bisect resulted in:
a1c34a3bf00af2cede839879502e12dc68491ad5 is the first bad commit
commit a1c34a3bf00af2cede839879502e12dc68491ad5
Author: Laura Abbott <laura@labbott.name>
Date: Thu Nov 5 18:48:46 2015 -0800
mm: Don't offset memmap for flatmem
Srinivas Kandagatla reported bad page messages when trying to remove the
bottom 2MB on an ARM based IFC6410 board
BUG: Bad page state in process swapper pfn:fffa8
page:ef7fb500 count:0 mapcount:0 mapping: (null) index:0x0
flags: 0x96640253(locked|error|dirty|active|arch_1|reclaim|mlocked)
page dumped because: PAGE_FLAGS_CHECK_AT_FREE flag(s) set
bad because of flags:
flags: 0x200041(locked|active|mlocked)
Modules linked in:
CPU: 0 PID: 0 Comm: swapper Not tainted 3.19.0-rc3-00007-g412f9ba-dirty
#816
Hardware name: Qualcomm (Flattened Device Tree)
unwind_backtrace
show_stack
dump_stack
bad_page
free_pages_prepare
free_hot_cold_page
__free_pages
free_highmem_page
mem_init
start_kernel
Disabling lock debugging due to kernel taint
[...]
:040000 040000 2de013c372345fd471cd58f0553c9b38b0ef1cc4
0a8156f848733dfa21e16c196dfb6c0a76290709 M mm
This fix for ARM does not account ARCH_PFN_OFFSET for mem_map as later used by
page_to_pfn anymore.
The following output was generated with two hacked in printk statements:
printk("before %p vs. %p or %p\n", mem_map, mem_map - offset, mem_map -
(pgdat->node_start_pfn - ARCH_PFN_OFFSET));
if (page_to_pfn(mem_map) != pgdat->node_start_pfn)
mem_map -= offset + (pgdat->node_start_pfn - ARCH_PFN_OFFSET);
printk("after %p\n", mem_map);
Output:
[ 0.000000] before 8861b280 vs. 8861b280 or 8851b280
[ 0.000000] after 8851b280
As seen in the first line mem_map with subtraction of offset does not equal the
mem_map after subtraction of ARCH_PFN_OFFSET.
After adding the offset of ARCH_PFN_OFFSET as well to mem_map as the
previously calculated offset is zero for the named platform it is able to boot
4.4 and 4.9-rc7 again.
Signed-off-by: Tobias Wolf <dev-NTEO@vplace.de>
---
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -7837,7 +7837,7 @@ static void __init alloc_node_mem_map(st
if (pgdat == NODE_DATA(0)) {
mem_map = NODE_DATA(0)->node_mem_map;
if (page_to_pfn(mem_map) != pgdat->node_start_pfn)
- mem_map -= offset;
+ mem_map -= offset + (pgdat->node_start_pfn - ARCH_PFN_OFFSET);
}
#endif
}

View File

@ -0,0 +1,81 @@
From: Felix Fietkau <nbd@nbd.name>
Subject: jffs2: use .rename2 and add RENAME_WHITEOUT support
It is required for renames on overlayfs
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -614,8 +614,8 @@ static int jffs2_rmdir (struct inode *di
return ret;
}
-static int jffs2_mknod (struct user_namespace *mnt_userns, struct inode *dir_i,
- struct dentry *dentry, umode_t mode, dev_t rdev)
+static int __jffs2_mknod (struct user_namespace *mnt_userns, struct inode *dir_i,
+ struct dentry *dentry, umode_t mode, dev_t rdev, bool whiteout)
{
struct jffs2_inode_info *f, *dir_f;
struct jffs2_sb_info *c;
@@ -754,7 +754,11 @@ static int jffs2_mknod (struct user_name
mutex_unlock(&dir_f->sem);
jffs2_complete_reservation(c);
- d_instantiate_new(dentry, inode);
+ if (!whiteout)
+ d_instantiate_new(dentry, inode);
+ else
+ unlock_new_inode(inode);
+
return 0;
fail:
@@ -762,6 +766,19 @@ static int jffs2_mknod (struct user_name
return ret;
}
+static int jffs2_mknod (struct user_namespace *mnt_userns, struct inode *dir_i,
+ struct dentry *dentry, umode_t mode, dev_t rdev)
+{
+ return __jffs2_mknod(mnt_userns, dir_i, dentry, mode, rdev, false);
+}
+
+static int jffs2_whiteout (struct user_namespace *mnt_userns, struct inode *old_dir,
+ struct dentry *old_dentry)
+{
+ return __jffs2_mknod(mnt_userns, old_dir, old_dentry, S_IFCHR | WHITEOUT_MODE,
+ WHITEOUT_DEV, true);
+}
+
static int jffs2_rename (struct user_namespace *mnt_userns,
struct inode *old_dir_i, struct dentry *old_dentry,
struct inode *new_dir_i, struct dentry *new_dentry,
@@ -773,7 +790,7 @@ static int jffs2_rename (struct user_nam
uint8_t type;
uint32_t now;
- if (flags & ~RENAME_NOREPLACE)
+ if (flags & ~(RENAME_NOREPLACE|RENAME_WHITEOUT))
return -EINVAL;
/* The VFS will check for us and prevent trying to rename a
@@ -839,9 +856,14 @@ static int jffs2_rename (struct user_nam
if (d_is_dir(old_dentry) && !victim_f)
inc_nlink(new_dir_i);
- /* Unlink the original */
- ret = jffs2_do_unlink(c, JFFS2_INODE_INFO(old_dir_i),
- old_dentry->d_name.name, old_dentry->d_name.len, NULL, now);
+ if (flags & RENAME_WHITEOUT)
+ /* Replace with whiteout */
+ ret = jffs2_whiteout(mnt_userns, old_dir_i, old_dentry);
+ else
+ /* Unlink the original */
+ ret = jffs2_do_unlink(c, JFFS2_INODE_INFO(old_dir_i),
+ old_dentry->d_name.name,
+ old_dentry->d_name.len, NULL, now);
/* We don't touch inode->i_nlink */

View File

@ -0,0 +1,73 @@
From: Felix Fietkau <nbd@nbd.name>
Subject: jffs2: add RENAME_EXCHANGE support
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -787,18 +787,31 @@ static int jffs2_rename (struct user_nam
int ret;
struct jffs2_sb_info *c = JFFS2_SB_INFO(old_dir_i->i_sb);
struct jffs2_inode_info *victim_f = NULL;
+ struct inode *fst_inode = d_inode(old_dentry);
+ struct inode *snd_inode = d_inode(new_dentry);
uint8_t type;
uint32_t now;
- if (flags & ~(RENAME_NOREPLACE|RENAME_WHITEOUT))
+ if (flags & ~(RENAME_NOREPLACE|RENAME_WHITEOUT|RENAME_EXCHANGE))
return -EINVAL;
+ if ((flags & RENAME_EXCHANGE) && (old_dir_i != new_dir_i)) {
+ if (S_ISDIR(fst_inode->i_mode) && !S_ISDIR(snd_inode->i_mode)) {
+ inc_nlink(new_dir_i);
+ drop_nlink(old_dir_i);
+ }
+ else if (!S_ISDIR(fst_inode->i_mode) && S_ISDIR(snd_inode->i_mode)) {
+ drop_nlink(new_dir_i);
+ inc_nlink(old_dir_i);
+ }
+ }
+
/* The VFS will check for us and prevent trying to rename a
* file over a directory and vice versa, but if it's a directory,
* the VFS can't check whether the victim is empty. The filesystem
* needs to do that for itself.
*/
- if (d_really_is_positive(new_dentry)) {
+ if (d_really_is_positive(new_dentry) && !(flags & RENAME_EXCHANGE)) {
victim_f = JFFS2_INODE_INFO(d_inode(new_dentry));
if (d_is_dir(new_dentry)) {
struct jffs2_full_dirent *fd;
@@ -833,7 +846,7 @@ static int jffs2_rename (struct user_nam
if (ret)
return ret;
- if (victim_f) {
+ if (victim_f && !(flags & RENAME_EXCHANGE)) {
/* There was a victim. Kill it off nicely */
if (d_is_dir(new_dentry))
clear_nlink(d_inode(new_dentry));
@@ -859,6 +872,12 @@ static int jffs2_rename (struct user_nam
if (flags & RENAME_WHITEOUT)
/* Replace with whiteout */
ret = jffs2_whiteout(mnt_userns, old_dir_i, old_dentry);
+ else if (flags & RENAME_EXCHANGE)
+ /* Replace the original */
+ ret = jffs2_do_link(c, JFFS2_INODE_INFO(old_dir_i),
+ d_inode(new_dentry)->i_ino, type,
+ old_dentry->d_name.name, old_dentry->d_name.len,
+ now);
else
/* Unlink the original */
ret = jffs2_do_unlink(c, JFFS2_INODE_INFO(old_dir_i),
@@ -890,7 +909,7 @@ static int jffs2_rename (struct user_nam
return ret;
}
- if (d_is_dir(old_dentry))
+ if (d_is_dir(old_dentry) && !(flags & RENAME_EXCHANGE))
drop_nlink(old_dir_i);
new_dir_i->i_mtime = new_dir_i->i_ctime = old_dir_i->i_mtime = old_dir_i->i_ctime = ITIME(now);

View File

@ -0,0 +1,20 @@
From: Felix Fietkau <nbd@nbd.name>
Subject: jffs2: add splice ops
Add splice_read using generic_file_splice_read.
Add splice_write using iter_file_splice_write
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -53,6 +53,8 @@ const struct file_operations jffs2_file_
.open = generic_file_open,
.read_iter = generic_file_read_iter,
.write_iter = generic_file_write_iter,
+ .splice_read = generic_file_splice_read,
+ .splice_write = iter_file_splice_write,
.unlocked_ioctl=jffs2_ioctl,
.mmap = generic_file_readonly_mmap,
.fsync = jffs2_fsync,

View File

@ -0,0 +1,45 @@
From: Stephen Hemminger <stephen@networkplumber.org>
Subject: bridge: allow receiption on disabled port
When an ethernet device is enslaved to a bridge, and the bridge STP
detects loss of carrier (or operational state down), then normally
packet receiption is blocked.
This breaks control applications like WPA which maybe expecting to
receive packets to negotiate to bring link up. The bridge needs to
block forwarding packets from these disabled ports, but there is no
hard requirement to not allow local packet delivery.
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -222,6 +222,9 @@ static void __br_handle_local_finish(str
/* note: already called with rcu_read_lock */
static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
+ struct net_bridge_port *p = br_port_get_rcu(skb->dev);
+
+ if (p->state != BR_STATE_DISABLED)
__br_handle_local_finish(skb);
/* return 1 to signal the okfn() was called so it's ok to use the skb */
@@ -390,6 +393,17 @@ forward:
goto defer_stp_filtering;
switch (p->state) {
+ case BR_STATE_DISABLED:
+ if (ether_addr_equal(p->br->dev->dev_addr, dest))
+ skb->pkt_type = PACKET_HOST;
+
+ if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING,
+ dev_net(skb->dev), NULL, skb, skb->dev, NULL,
+ br_handle_local_finish) == 1) {
+ return RX_HANDLER_PASS;
+ }
+ break;
+
case BR_STATE_FORWARDING:
case BR_STATE_LEARNING:
defer_stp_filtering:

View File

@ -0,0 +1,94 @@
From: Daniel González Cabanelas <dgcbueu@gmail.com>
Subject: [PATCH 1/2] rtc: rs5c372: support alarms up to 1 week
The Ricoh R2221x, R2223x, RS5C372, RV5C387A chips can handle 1 week
alarms.
Read the "wday" alarm register and convert it to a date to support up 1
week in our driver.
Signed-off-by: Daniel González Cabanelas <dgcbueu@gmail.com>
---
drivers/rtc/rtc-rs5c372.c | 48 ++++++++++++++++++++++++++++++++++-----
1 file changed, 42 insertions(+), 6 deletions(-)
--- a/drivers/rtc/rtc-rs5c372.c
+++ b/drivers/rtc/rtc-rs5c372.c
@@ -399,7 +399,9 @@ static int rs5c_read_alarm(struct device
{
struct i2c_client *client = to_i2c_client(dev);
struct rs5c372 *rs5c = i2c_get_clientdata(client);
- int status;
+ int status, wday_offs;
+ struct rtc_time rtc;
+ unsigned long alarm_secs;
status = rs5c_get_regs(rs5c);
if (status < 0)
@@ -409,6 +411,30 @@ static int rs5c_read_alarm(struct device
t->time.tm_sec = 0;
t->time.tm_min = bcd2bin(rs5c->regs[RS5C_REG_ALARM_A_MIN] & 0x7f);
t->time.tm_hour = rs5c_reg2hr(rs5c, rs5c->regs[RS5C_REG_ALARM_A_HOURS]);
+ t->time.tm_wday = ffs(rs5c->regs[RS5C_REG_ALARM_A_WDAY] & 0x7f) - 1;
+
+ /* determine the day, month and year based on alarm wday, taking as a
+ * reference the current time from the rtc
+ */
+ status = rs5c372_rtc_read_time(dev, &rtc);
+ if (status < 0)
+ return status;
+
+ wday_offs = t->time.tm_wday - rtc.tm_wday;
+ alarm_secs = mktime64(rtc.tm_year + 1900,
+ rtc.tm_mon + 1,
+ rtc.tm_mday + wday_offs,
+ t->time.tm_hour,
+ t->time.tm_min,
+ t->time.tm_sec);
+
+ if (wday_offs < 0 || (wday_offs == 0 &&
+ (t->time.tm_hour < rtc.tm_hour ||
+ (t->time.tm_hour == rtc.tm_hour &&
+ t->time.tm_min <= rtc.tm_min))))
+ alarm_secs += 7 * 86400;
+
+ rtc_time64_to_tm(alarm_secs, &t->time);
/* ... and status */
t->enabled = !!(rs5c->regs[RS5C_REG_CTRL1] & RS5C_CTRL1_AALE);
@@ -423,12 +449,20 @@ static int rs5c_set_alarm(struct device
struct rs5c372 *rs5c = i2c_get_clientdata(client);
int status, addr, i;
unsigned char buf[3];
+ struct rtc_time rtc_tm;
+ unsigned long rtc_secs, alarm_secs;
- /* only handle up to 24 hours in the future, like RTC_ALM_SET */
- if (t->time.tm_mday != -1
- || t->time.tm_mon != -1
- || t->time.tm_year != -1)
+ /* chip only can handle alarms up to one week in the future*/
+ status = rs5c372_rtc_read_time(dev, &rtc_tm);
+ if (status)
+ return status;
+ rtc_secs = rtc_tm_to_time64(&rtc_tm);
+ alarm_secs = rtc_tm_to_time64(&t->time);
+ if (alarm_secs >= rtc_secs + 7 * 86400) {
+ dev_err(dev, "%s: alarm maximum is one week in the future (%d)\n",
+ __func__, status);
return -EINVAL;
+ }
/* REVISIT: round up tm_sec */
@@ -449,7 +483,9 @@ static int rs5c_set_alarm(struct device
/* set alarm */
buf[0] = bin2bcd(t->time.tm_min);
buf[1] = rs5c_hr2reg(rs5c, t->time.tm_hour);
- buf[2] = 0x7f; /* any/all days */
+ /* each bit is the day of the week, 0x7f means all days */
+ buf[2] = (t->time.tm_wday >= 0 && t->time.tm_wday < 7) ?
+ BIT(t->time.tm_wday) : 0x7f;
for (i = 0; i < sizeof(buf); i++) {
addr = RS5C_ADDR(RS5C_REG_ALARM_A_MIN + i);

View File

@ -0,0 +1,70 @@
From: Daniel González Cabanelas <dgcbueu@gmail.com>
Subject: [PATCH 2/2] rtc: rs5c372: let the alarm to be used as wakeup source
Currently there is no use for the interrupts on the rs5c372 RTC and the
wakealarm isn't enabled. There are some devices like NASes which use this
RTC to wake up from the power off state when the INTR pin is activated by
the alarm clock.
Enable the alarm and let to be used as a wakeup source.
Tested on a Buffalo LS421DE NAS.
Signed-off-by: Daniel González Cabanelas <dgcbueu@gmail.com>
---
drivers/rtc/rtc-rs5c372.c | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
--- a/drivers/rtc/rtc-rs5c372.c
+++ b/drivers/rtc/rtc-rs5c372.c
@@ -833,6 +833,7 @@ static int rs5c372_probe(struct i2c_clie
int err = 0;
int smbus_mode = 0;
struct rs5c372 *rs5c372;
+ bool rs5c372_can_wakeup_device = false;
dev_dbg(&client->dev, "%s\n", __func__);
@@ -868,6 +869,12 @@ static int rs5c372_probe(struct i2c_clie
else
rs5c372->type = id->driver_data;
+#ifdef CONFIG_OF
+ if(of_property_read_bool(client->dev.of_node,
+ "wakeup-source"))
+ rs5c372_can_wakeup_device = true;
+#endif
+
/* we read registers 0x0f then 0x00-0x0f; skip the first one */
rs5c372->regs = &rs5c372->buf[1];
rs5c372->smbus = smbus_mode;
@@ -901,6 +908,8 @@ static int rs5c372_probe(struct i2c_clie
goto exit;
}
+ rs5c372->has_irq = 1;
+
/* if the oscillator lost power and no other software (like
* the bootloader) set it up, do it here.
*
@@ -927,6 +936,10 @@ static int rs5c372_probe(struct i2c_clie
);
/* REVISIT use client->irq to register alarm irq ... */
+ if (rs5c372_can_wakeup_device) {
+ device_init_wakeup(&client->dev, true);
+ }
+
rs5c372->rtc = devm_rtc_device_register(&client->dev,
rs5c372_driver.driver.name,
&rs5c372_rtc_ops, THIS_MODULE);
@@ -940,6 +953,9 @@ static int rs5c372_probe(struct i2c_clie
if (err)
goto exit;
+ /* the rs5c372 alarm only supports a minute accuracy */
+
+
return 0;
exit:

Some files were not shown because too many files have changed in this diff Show More