rockchip: drop broken drm backports from kernel 6.6

It will require too much effort to make the driver work in kernel 6.6.

Signed-off-by: Tianling Shen <cnsztl@immortalwrt.org>
This commit is contained in:
Tianling Shen 2025-02-15 00:51:15 +08:00
parent 7c55524ec8
commit d04b0cbe39
No known key found for this signature in database
GPG Key ID: 6850B6345C862176
102 changed files with 3 additions and 31284 deletions

View File

@ -358,7 +358,7 @@ $(eval $(call KernelPackage,drm-exec))
define KernelPackage/drm-gem-shmem-helper
SUBMENU:=$(VIDEO_MENU)
TITLE:=GEM shmem helper functions
DEPENDS:=@DISPLAY_SUPPORT +kmod-drm @!LINUX_5_15
DEPENDS:=@DISPLAY_SUPPORT +kmod-drm
KCONFIG:=CONFIG_DRM_GEM_SHMEM_HELPER
FILES:=$(LINUX_DIR)/drivers/gpu/drm/drm_shmem_helper.ko
AUTOLOAD:=$(call AutoProbe,drm_shmem_helper)
@ -657,8 +657,8 @@ $(eval $(call KernelPackage,drm-panfrost))
define KernelPackage/drm-panthor
SUBMENU:=$(VIDEO_MENU)
TITLE:=DRM support for ARM Mali CSF-based GPUs
DEPENDS:=@TARGET_rockchip +kmod-drm +kmod-drm-exec +kmod-drm-gem-shmem-helper \
panthor-firmware
DEPENDS:=@LINUX_6_12 @TARGET_rockchip +kmod-drm +kmod-drm-exec \
+kmod-drm-gem-shmem-helper +panthor-firmware
KCONFIG:= \
CONFIG_DRM_GPUVM \
CONFIG_DRM_PANTHOR

View File

@ -1,88 +0,0 @@
From 3c3cfcb93f6e6e1cede0cdfe3ec24f16ee108929 Mon Sep 17 00:00:00 2001
From: Jagan Teki <jagan@edgeble.ai>
Date: Mon, 31 Jul 2023 16:30:04 +0530
Subject: [PATCH] drm/rockchip: vop: Add rv1126 vop_lite support
RV1126 VOP_LITE supports the video output processing ofMIPI DSI,
RGB display interfaces with max output resolution of 1920x1080.
Add support for rv1126 vop.
Signed-off-by: Jagan Teki <jagan@edgeble.ai>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20230731110012.2913742-7-jagan@edgeble.ai
---
drivers/gpu/drm/rockchip/rockchip_vop_reg.c | 55 +++++++++++++++++++++
1 file changed, 55 insertions(+)
--- a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c
+++ b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c
@@ -1122,6 +1122,59 @@ static const struct vop_data rk3328_vop
.max_output = { 4096, 2160 },
};
+static const struct vop_common rv1126_common = {
+ .standby = VOP_REG_SYNC(PX30_SYS_CTRL2, 0x1, 1),
+ .out_mode = VOP_REG(PX30_DSP_CTRL2, 0xf, 16),
+ .dsp_blank = VOP_REG(PX30_DSP_CTRL2, 0x1, 14),
+ .dither_down_en = VOP_REG(PX30_DSP_CTRL2, 0x1, 8),
+ .dither_down_sel = VOP_REG(PX30_DSP_CTRL2, 0x1, 7),
+ .dither_down_mode = VOP_REG(PX30_DSP_CTRL2, 0x1, 6),
+ .cfg_done = VOP_REG_SYNC(PX30_REG_CFG_DONE, 0x1, 0),
+ .dither_up = VOP_REG(PX30_DSP_CTRL2, 0x1, 2),
+ .dsp_lut_en = VOP_REG(PX30_DSP_CTRL2, 0x1, 5),
+ .gate_en = VOP_REG(PX30_DSP_CTRL2, 0x1, 0),
+};
+
+static const struct vop_modeset rv1126_modeset = {
+ .htotal_pw = VOP_REG(PX30_DSP_HTOTAL_HS_END, 0x0fff0fff, 0),
+ .hact_st_end = VOP_REG(PX30_DSP_HACT_ST_END, 0x0fff0fff, 0),
+ .vtotal_pw = VOP_REG(PX30_DSP_VTOTAL_VS_END, 0x0fff0fff, 0),
+ .vact_st_end = VOP_REG(PX30_DSP_VACT_ST_END, 0x0fff0fff, 0),
+};
+
+static const struct vop_output rv1126_output = {
+ .rgb_dclk_pol = VOP_REG(PX30_DSP_CTRL0, 0x1, 1),
+ .rgb_pin_pol = VOP_REG(PX30_DSP_CTRL0, 0x7, 2),
+ .rgb_en = VOP_REG(PX30_DSP_CTRL0, 0x1, 0),
+ .mipi_dclk_pol = VOP_REG(PX30_DSP_CTRL0, 0x1, 25),
+ .mipi_pin_pol = VOP_REG(PX30_DSP_CTRL0, 0x7, 26),
+ .mipi_en = VOP_REG(PX30_DSP_CTRL0, 0x1, 24),
+};
+
+static const struct vop_misc rv1126_misc = {
+ .global_regdone_en = VOP_REG(PX30_SYS_CTRL2, 0x1, 13),
+};
+
+static const struct vop_win_data rv1126_vop_win_data[] = {
+ { .base = 0x00, .phy = &px30_win0_data,
+ .type = DRM_PLANE_TYPE_OVERLAY },
+ { .base = 0x00, .phy = &px30_win2_data,
+ .type = DRM_PLANE_TYPE_PRIMARY },
+};
+
+static const struct vop_data rv1126_vop = {
+ .version = VOP_VERSION(2, 0xb),
+ .intr = &px30_intr,
+ .common = &rv1126_common,
+ .modeset = &rv1126_modeset,
+ .output = &rv1126_output,
+ .misc = &rv1126_misc,
+ .win = rv1126_vop_win_data,
+ .win_size = ARRAY_SIZE(rv1126_vop_win_data),
+ .max_output = { 1920, 1080 },
+ .lut_size = 1024,
+};
+
static const struct of_device_id vop_driver_dt_match[] = {
{ .compatible = "rockchip,rk3036-vop",
.data = &rk3036_vop },
@@ -1149,6 +1202,8 @@ static const struct of_device_id vop_dri
.data = &rk3228_vop },
{ .compatible = "rockchip,rk3328-vop",
.data = &rk3328_vop },
+ { .compatible = "rockchip,rv1126-vop",
+ .data = &rv1126_vop },
{},
};
MODULE_DEVICE_TABLE(of, vop_driver_dt_match);

View File

@ -1,60 +0,0 @@
From 11fdb231f4127bf60839a63a8c7ed640ebe4751a Mon Sep 17 00:00:00 2001
From: Jagan Teki <jagan@edgeble.ai>
Date: Mon, 31 Jul 2023 16:30:06 +0530
Subject: [PATCH] drm/rockchip: dsi: Add rv1126 MIPI DSI support
RV1126 MIPI DSI supports V1.2 DPHY with 4 lanes and 1Gbps transfer
rate for lane.
Add support for it.
Signed-off-by: Jagan Teki <jagan@edgeble.ai>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20230731110012.2913742-9-jagan@edgeble.ai
---
.../gpu/drm/rockchip/dw-mipi-dsi-rockchip.c | 20 +++++++++++++++++++
1 file changed, 20 insertions(+)
--- a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
+++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
@@ -197,6 +197,11 @@
#define RK3568_DSI1_TURNDISABLE BIT(2)
#define RK3568_DSI1_FORCERXMODE BIT(0)
+#define RV1126_GRF_DSIPHY_CON 0x10220
+#define RV1126_DSI_FORCETXSTOPMODE (0xf << 4)
+#define RV1126_DSI_TURNDISABLE BIT(2)
+#define RV1126_DSI_FORCERXMODE BIT(0)
+
#define HIWORD_UPDATE(val, mask) (val | (mask) << 16)
enum {
@@ -1650,6 +1655,18 @@ static const struct rockchip_dw_dsi_chip
{ /* sentinel */ }
};
+static const struct rockchip_dw_dsi_chip_data rv1126_chip_data[] = {
+ {
+ .reg = 0xffb30000,
+ .lanecfg1_grf_reg = RV1126_GRF_DSIPHY_CON,
+ .lanecfg1 = HIWORD_UPDATE(0, RV1126_DSI_TURNDISABLE |
+ RV1126_DSI_FORCERXMODE |
+ RV1126_DSI_FORCETXSTOPMODE),
+ .max_data_lanes = 4,
+ },
+ { /* sentinel */ }
+};
+
static const struct of_device_id dw_mipi_dsi_rockchip_dt_ids[] = {
{
.compatible = "rockchip,px30-mipi-dsi",
@@ -1663,6 +1680,9 @@ static const struct of_device_id dw_mipi
}, {
.compatible = "rockchip,rk3568-mipi-dsi",
.data = &rk3568_chip_data,
+ }, {
+ .compatible = "rockchip,rv1126-mipi-dsi",
+ .data = &rv1126_chip_data,
},
{ /* sentinel */ }
};

View File

@ -1,71 +0,0 @@
From 800f7c332df7cd9614c416fd005a6bb53f96f13c Mon Sep 17 00:00:00 2001
From: Jonas Karlman <jonas@kwiboo.se>
Date: Wed, 21 Jun 2023 22:33:18 +0000
Subject: [PATCH] drm/rockchip: vop: Use cleanup helper directly as destroy
funcs
vop_plane_destroy and vop_crtc_destroy are plain wrappers around
drm_plane_cleanup and drm_crtc_cleanup. Use them directly as plane and
crtc funcs to closer match VOP2 driver.
Signed-off-by: Jonas Karlman <jonas@kwiboo.se>
Reviewed-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20230621223311.2239547-3-jonas@kwiboo.se
---
drivers/gpu/drm/rockchip/rockchip_drm_vop.c | 16 +++-------------
1 file changed, 3 insertions(+), 13 deletions(-)
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
@@ -773,11 +773,6 @@ out:
}
}
-static void vop_plane_destroy(struct drm_plane *plane)
-{
- drm_plane_cleanup(plane);
-}
-
static inline bool rockchip_afbc(u64 modifier)
{
return modifier == ROCKCHIP_AFBC_MOD;
@@ -1139,7 +1134,7 @@ static const struct drm_plane_helper_fun
static const struct drm_plane_funcs vop_plane_funcs = {
.update_plane = drm_atomic_helper_update_plane,
.disable_plane = drm_atomic_helper_disable_plane,
- .destroy = vop_plane_destroy,
+ .destroy = drm_plane_cleanup,
.reset = drm_atomic_helper_plane_reset,
.atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state,
.atomic_destroy_state = drm_atomic_helper_plane_destroy_state,
@@ -1614,11 +1609,6 @@ static const struct drm_crtc_helper_func
.atomic_disable = vop_crtc_atomic_disable,
};
-static void vop_crtc_destroy(struct drm_crtc *crtc)
-{
- drm_crtc_cleanup(crtc);
-}
-
static struct drm_crtc_state *vop_crtc_duplicate_state(struct drm_crtc *crtc)
{
struct rockchip_crtc_state *rockchip_state;
@@ -1726,7 +1716,7 @@ vop_crtc_verify_crc_source(struct drm_cr
static const struct drm_crtc_funcs vop_crtc_funcs = {
.set_config = drm_atomic_helper_set_config,
.page_flip = drm_atomic_helper_page_flip,
- .destroy = vop_crtc_destroy,
+ .destroy = drm_crtc_cleanup,
.reset = vop_crtc_reset,
.atomic_duplicate_state = vop_crtc_duplicate_state,
.atomic_destroy_state = vop_crtc_destroy_state,
@@ -1977,7 +1967,7 @@ static void vop_destroy_crtc(struct vop
*/
list_for_each_entry_safe(plane, tmp, &drm_dev->mode_config.plane_list,
head)
- vop_plane_destroy(plane);
+ drm_plane_cleanup(plane);
/*
* Destroy CRTC after vop_plane_destroy() since vop_disable_plane()

View File

@ -1,35 +0,0 @@
From eb23cffdd7f085149799e5eda12a9aff792cc34d Mon Sep 17 00:00:00 2001
From: Michael Tretter <m.tretter@pengutronix.de>
Date: Mon, 9 Oct 2023 12:37:53 +0200
Subject: [PATCH] drm/rockchip: vop2: Demote message in mod_supported to
drm_dbg_kms
Checking if a modifier is supported by a plane is normal behavior. It is
normal that a plane may not support certain modifiers. Failing the check
doesn't justify an error message in the kernel log and may mislead
users.
Demote the error message to drm_dbg_kms to only print the message if the
respective debug messages are enabled. This is similar to the behavior
in rockchip_drm_vop.c.
Signed-off-by: Michael Tretter <m.tretter@pengutronix.de>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231009103753.830458-1-m.tretter@pengutronix.de
---
drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
@@ -478,8 +478,8 @@ static bool rockchip_vop2_mod_supported(
return true;
if (!rockchip_afbc(plane, modifier)) {
- drm_err(vop2->drm, "Unsupported format modifier 0x%llx\n",
- modifier);
+ drm_dbg_kms(vop2->drm, "Unsupported format modifier 0x%llx\n",
+ modifier);
return false;
}

View File

@ -1,53 +0,0 @@
From 63a06c9fe30bf84d1ab6f07d0e408bd1d4ccaf85 Mon Sep 17 00:00:00 2001
From: Zhu Wang <wangzhu9@huawei.com>
Date: Mon, 31 Jul 2023 20:53:04 +0800
Subject: [PATCH] drm/rockchip: remove redundant of_match_ptr
The driver depends on CONFIG_OF, so it is not necessary to use
of_match_ptr here.
Even for drivers that do not depend on CONFIG_OF, it's almost always
better to leave out the of_match_ptr(), since the only thing it can
possibly do is to save a few bytes of .text if a driver can be used both
with and without it. Hence we remove of_match_ptr.
Signed-off-by: Zhu Wang <wangzhu9@huawei.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20230731125304.87059-1-wangzhu9@huawei.com
---
drivers/gpu/drm/rockchip/cdn-dp-core.c | 2 +-
drivers/gpu/drm/rockchip/rockchip_lvds.c | 2 +-
drivers/gpu/drm/rockchip/rockchip_vop2_reg.c | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
--- a/drivers/gpu/drm/rockchip/cdn-dp-core.c
+++ b/drivers/gpu/drm/rockchip/cdn-dp-core.c
@@ -1260,7 +1260,7 @@ struct platform_driver cdn_dp_driver = {
.driver = {
.name = "cdn-dp",
.owner = THIS_MODULE,
- .of_match_table = of_match_ptr(cdn_dp_dt_ids),
+ .of_match_table = cdn_dp_dt_ids,
.pm = &cdn_dp_pm_ops,
},
};
--- a/drivers/gpu/drm/rockchip/rockchip_lvds.c
+++ b/drivers/gpu/drm/rockchip/rockchip_lvds.c
@@ -750,6 +750,6 @@ struct platform_driver rockchip_lvds_dri
.remove_new = rockchip_lvds_remove,
.driver = {
.name = "rockchip-lvds",
- .of_match_table = of_match_ptr(rockchip_lvds_dt_ids),
+ .of_match_table = rockchip_lvds_dt_ids,
},
};
--- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
+++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
@@ -274,6 +274,6 @@ struct platform_driver vop2_platform_dri
.remove_new = vop2_remove,
.driver = {
.name = "rockchip-vop2",
- .of_match_table = of_match_ptr(vop2_dt_match),
+ .of_match_table = vop2_dt_match,
},
};

View File

@ -1,29 +0,0 @@
From 253a1d33e5cfdf62525f5d6ed2bf03acbadd1582 Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Fri, 21 Apr 2023 16:13:03 +0800
Subject: [PATCH] drm/rockchip: dsi: Use
devm_platform_get_and_ioremap_resource()
Convert platform_get_resource(), devm_ioremap_resource() to a single
call to devm_platform_get_and_ioremap_resource(), as this is exactly
what this function does.
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20230421081303.122452-1-yang.lee@linux.alibaba.com
---
drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
--- a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
+++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
@@ -1357,8 +1357,7 @@ static int dw_mipi_dsi_rockchip_probe(st
if (!dsi)
return -ENOMEM;
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- dsi->base = devm_ioremap_resource(dev, res);
+ dsi->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
if (IS_ERR(dsi->base)) {
DRM_DEV_ERROR(dev, "Unable to get dsi registers\n");
return PTR_ERR(dsi->base);

View File

@ -1,54 +0,0 @@
From ac1c11c23fc51c1ba51a3ed586df40ffe6b1de35 Mon Sep 17 00:00:00 2001
From: Andy Yan <andy.yan@rock-chips.com>
Date: Fri, 13 Oct 2023 20:20:36 +0800
Subject: [PATCH] drm/rockchip: remove unused struct in vop2
These structs are undefined and un used.
Fixes: 604be85547ce ("drm/rockchip: Add VOP2 driver")
Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
Reviewed-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231013122036.1594090-1-andyshrk@163.com
---
drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 2 --
drivers/gpu/drm/rockchip/rockchip_drm_vop2.h | 3 ---
2 files changed, 5 deletions(-)
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
@@ -159,7 +159,6 @@ struct vop2_video_port {
struct vop2 *vop2;
struct clk *dclk;
unsigned int id;
- const struct vop2_video_port_regs *regs;
const struct vop2_video_port_data *data;
struct completion dsp_hold_completion;
@@ -2337,7 +2336,6 @@ static int vop2_create_crtcs(struct vop2
vp = &vop2->vps[i];
vp->vop2 = vop2;
vp->id = vp_data->id;
- vp->regs = vp_data->regs;
vp->data = vp_data;
snprintf(dclk_name, sizeof(dclk_name), "dclk_vp%d", vp->id);
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h
@@ -134,16 +134,13 @@ struct vop2_video_port_data {
u16 cubic_lut_len;
struct vop_rect max_output;
const u8 pre_scan_max_dly[4];
- const struct vop2_video_port_regs *regs;
unsigned int offset;
};
struct vop2_data {
u8 nr_vps;
- const struct vop2_ctrl *ctrl;
const struct vop2_win_data *win;
const struct vop2_video_port_data *vp;
- const struct vop_csc_table *csc_table;
struct vop_rect max_input;
struct vop_rect max_output;

View File

@ -1,36 +0,0 @@
From dc00748adcf03d754bf43035c668bc5b20fb6597 Mon Sep 17 00:00:00 2001
From: Andy Yan <andy.yan@rock-chips.com>
Date: Fri, 13 Oct 2023 20:20:51 +0800
Subject: [PATCH] drm/rockchip: remove NR_LAYERS macro on vop2
There are 8 layers on rk3588, so a fix defined macro is
not appropriate.
Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
Reviewed-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231013122051.1594164-1-andyshrk@163.com
---
drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
@@ -2314,8 +2314,6 @@ static struct vop2_video_port *find_vp_w
return NULL;
}
-#define NR_LAYERS 6
-
static int vop2_create_crtcs(struct vop2 *vop2)
{
const struct vop2_data *vop2_data = vop2->data;
@@ -2434,7 +2432,7 @@ static int vop2_create_crtcs(struct vop2
struct vop2_video_port *vp = &vop2->vps[i];
if (vp->crtc.port)
- vp->nlayers = NR_LAYERS / nvps;
+ vp->nlayers = vop2_data->win_size / nvps;
}
return 0;

View File

@ -1,57 +0,0 @@
From 45ad07c7053df0b67e13d8deb574920d11651fb2 Mon Sep 17 00:00:00 2001
From: Andy Yan <andy.yan@rock-chips.com>
Date: Wed, 18 Oct 2023 17:42:10 +0800
Subject: [PATCH] drm/rockchip: vop: fix format bpp calculation
We can't rely on cpp for bpp calculation as the cpp of
some formats(DRM_FORMAT_YUV420_8BIT/10BIT, etc) is zero.
Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
Acked-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231018094210.2475771-1-andyshrk@163.com
---
drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 18 ++++++++++++++++--
1 file changed, 16 insertions(+), 2 deletions(-)
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
@@ -281,6 +281,20 @@ static void vop2_win_disable(struct vop2
vop2_win_write(win, VOP2_WIN_CLUSTER_ENABLE, 0);
}
+static u32 vop2_get_bpp(const struct drm_format_info *format)
+{
+ switch (format->format) {
+ case DRM_FORMAT_YUV420_8BIT:
+ return 12;
+ case DRM_FORMAT_YUV420_10BIT:
+ return 15;
+ case DRM_FORMAT_VUY101010:
+ return 30;
+ default:
+ return drm_format_info_bpp(format, 0);
+ }
+}
+
static enum vop2_data_format vop2_convert_format(u32 format)
{
switch (format) {
@@ -491,7 +505,7 @@ static u32 vop2_afbc_transform_offset(st
{
struct drm_rect *src = &pstate->src;
struct drm_framebuffer *fb = pstate->fb;
- u32 bpp = fb->format->cpp[0] * 8;
+ u32 bpp = vop2_get_bpp(fb->format);
u32 vir_width = (fb->pitches[0] << 3) / bpp;
u32 width = drm_rect_width(src) >> 16;
u32 height = drm_rect_height(src) >> 16;
@@ -1091,7 +1105,7 @@ static void vop2_plane_atomic_update(str
struct drm_display_mode *adjusted_mode = &crtc->state->adjusted_mode;
struct vop2 *vop2 = win->vop2;
struct drm_framebuffer *fb = pstate->fb;
- u32 bpp = fb->format->cpp[0] * 8;
+ u32 bpp = vop2_get_bpp(fb->format);
u32 actual_w, actual_h, dsp_w, dsp_h;
u32 act_info, dsp_info;
u32 format;

View File

@ -1,89 +0,0 @@
From 01d5a75370a60c3a8d691347ae6ebb2a9f8dc44a Mon Sep 17 00:00:00 2001
From: Andy Yan <andy.yan@rock-chips.com>
Date: Wed, 18 Oct 2023 17:42:39 +0800
Subject: [PATCH] drm/rockchip: vop2: remove the unsupported format of cluster
window
The cluster window on vop2 doesn't support linear yuv
format(NV12/16/24), it only support afbc based yuv
format(DRM_FORMAT_YUV420_8BIT/10BIT), which will be
added in next patch.
Fixes: 604be85547ce ("drm/rockchip: Add VOP2 driver")
Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
Acked-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231018094239.2475851-1-andyshrk@163.com
---
drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 24 +-------------------
drivers/gpu/drm/rockchip/rockchip_vop2_reg.c | 3 ---
2 files changed, 1 insertion(+), 26 deletions(-)
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
@@ -341,10 +341,6 @@ static enum vop2_afbc_format vop2_conver
case DRM_FORMAT_RGB565:
case DRM_FORMAT_BGR565:
return VOP2_AFBC_FMT_RGB565;
- case DRM_FORMAT_NV12:
- return VOP2_AFBC_FMT_YUV420;
- case DRM_FORMAT_NV16:
- return VOP2_AFBC_FMT_YUV422;
default:
return VOP2_AFBC_FMT_INVALID;
}
@@ -365,25 +361,9 @@ static bool vop2_win_rb_swap(u32 format)
}
}
-static bool vop2_afbc_rb_swap(u32 format)
-{
- switch (format) {
- case DRM_FORMAT_NV24:
- return true;
- default:
- return false;
- }
-}
-
static bool vop2_afbc_uv_swap(u32 format)
{
- switch (format) {
- case DRM_FORMAT_NV12:
- case DRM_FORMAT_NV16:
- return true;
- default:
- return false;
- }
+ return false;
}
static bool vop2_win_uv_swap(u32 format)
@@ -1243,7 +1223,6 @@ static void vop2_plane_atomic_update(str
drm_err(vop2->drm, "vp%d %s stride[%d] not 64 pixel aligned\n",
vp->id, win->data->name, stride);
- rb_swap = vop2_afbc_rb_swap(fb->format->format);
uv_swap = vop2_afbc_uv_swap(fb->format->format);
/*
* This is a workaround for crazy IC design, Cluster
@@ -1260,7 +1239,6 @@ static void vop2_plane_atomic_update(str
if (vop2_cluster_window(win))
vop2_win_write(win, VOP2_WIN_AFBC_ENABLE, 1);
vop2_win_write(win, VOP2_WIN_AFBC_FORMAT, afbc_format);
- vop2_win_write(win, VOP2_WIN_AFBC_RB_SWAP, rb_swap);
vop2_win_write(win, VOP2_WIN_AFBC_UV_SWAP, uv_swap);
vop2_win_write(win, VOP2_WIN_AFBC_AUTO_GATING_EN, 0);
vop2_win_write(win, VOP2_WIN_AFBC_BLOCK_SPLIT_EN, 0);
--- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
+++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
@@ -24,9 +24,6 @@ static const uint32_t formats_win_full_1
DRM_FORMAT_BGR888,
DRM_FORMAT_RGB565,
DRM_FORMAT_BGR565,
- DRM_FORMAT_NV12,
- DRM_FORMAT_NV16,
- DRM_FORMAT_NV24,
};
static const uint32_t formats_win_full_10bit_yuyv[] = {

View File

@ -1,162 +0,0 @@
From bfd8a5c228fa3bb97884f77529c09e8745da08b9 Mon Sep 17 00:00:00 2001
From: Andy Yan <andy.yan@rock-chips.com>
Date: Wed, 18 Oct 2023 17:43:18 +0800
Subject: [PATCH] drm/rockchip: vop2: Add more supported 10bit formats
Add 10 bit RGB and AFBC based YUV format supported
by vop2.
Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
Acked-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231018094318.2476081-1-andyshrk@163.com
---
drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 45 +++++++++++++++++++-
drivers/gpu/drm/rockchip/rockchip_vop2_reg.c | 22 +++++++---
2 files changed, 61 insertions(+), 6 deletions(-)
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
@@ -298,6 +298,11 @@ static u32 vop2_get_bpp(const struct drm
static enum vop2_data_format vop2_convert_format(u32 format)
{
switch (format) {
+ case DRM_FORMAT_XRGB2101010:
+ case DRM_FORMAT_ARGB2101010:
+ case DRM_FORMAT_XBGR2101010:
+ case DRM_FORMAT_ABGR2101010:
+ return VOP2_FMT_XRGB101010;
case DRM_FORMAT_XRGB8888:
case DRM_FORMAT_ARGB8888:
case DRM_FORMAT_XBGR8888:
@@ -310,10 +315,19 @@ static enum vop2_data_format vop2_conver
case DRM_FORMAT_BGR565:
return VOP2_FMT_RGB565;
case DRM_FORMAT_NV12:
+ case DRM_FORMAT_NV21:
+ case DRM_FORMAT_YUV420_8BIT:
return VOP2_FMT_YUV420SP;
+ case DRM_FORMAT_NV15:
+ case DRM_FORMAT_YUV420_10BIT:
+ return VOP2_FMT_YUV420SP_10;
case DRM_FORMAT_NV16:
+ case DRM_FORMAT_NV61:
return VOP2_FMT_YUV422SP;
+ case DRM_FORMAT_Y210:
+ return VOP2_FMT_YUV422SP_10;
case DRM_FORMAT_NV24:
+ case DRM_FORMAT_NV42:
return VOP2_FMT_YUV444SP;
case DRM_FORMAT_YUYV:
case DRM_FORMAT_YVYU:
@@ -330,6 +344,11 @@ static enum vop2_data_format vop2_conver
static enum vop2_afbc_format vop2_convert_afbc_format(u32 format)
{
switch (format) {
+ case DRM_FORMAT_XRGB2101010:
+ case DRM_FORMAT_ARGB2101010:
+ case DRM_FORMAT_XBGR2101010:
+ case DRM_FORMAT_ABGR2101010:
+ return VOP2_AFBC_FMT_ARGB2101010;
case DRM_FORMAT_XRGB8888:
case DRM_FORMAT_ARGB8888:
case DRM_FORMAT_XBGR8888:
@@ -341,6 +360,17 @@ static enum vop2_afbc_format vop2_conver
case DRM_FORMAT_RGB565:
case DRM_FORMAT_BGR565:
return VOP2_AFBC_FMT_RGB565;
+ case DRM_FORMAT_YUV420_8BIT:
+ return VOP2_AFBC_FMT_YUV420;
+ case DRM_FORMAT_YUV420_10BIT:
+ return VOP2_AFBC_FMT_YUV420_10BIT;
+ case DRM_FORMAT_YVYU:
+ case DRM_FORMAT_YUYV:
+ case DRM_FORMAT_VYUY:
+ case DRM_FORMAT_UYVY:
+ return VOP2_AFBC_FMT_YUV422;
+ case DRM_FORMAT_Y210:
+ return VOP2_AFBC_FMT_YUV422_10BIT;
default:
return VOP2_AFBC_FMT_INVALID;
}
@@ -351,6 +381,8 @@ static enum vop2_afbc_format vop2_conver
static bool vop2_win_rb_swap(u32 format)
{
switch (format) {
+ case DRM_FORMAT_XBGR2101010:
+ case DRM_FORMAT_ABGR2101010:
case DRM_FORMAT_XBGR8888:
case DRM_FORMAT_ABGR8888:
case DRM_FORMAT_BGR888:
@@ -363,7 +395,15 @@ static bool vop2_win_rb_swap(u32 format)
static bool vop2_afbc_uv_swap(u32 format)
{
- return false;
+ switch (format) {
+ case DRM_FORMAT_YUYV:
+ case DRM_FORMAT_Y210:
+ case DRM_FORMAT_YUV420_8BIT:
+ case DRM_FORMAT_YUV420_10BIT:
+ return true;
+ default:
+ return false;
+ }
}
static bool vop2_win_uv_swap(u32 format)
@@ -372,6 +412,9 @@ static bool vop2_win_uv_swap(u32 format)
case DRM_FORMAT_NV12:
case DRM_FORMAT_NV16:
case DRM_FORMAT_NV24:
+ case DRM_FORMAT_NV15:
+ case DRM_FORMAT_YUYV:
+ case DRM_FORMAT_UYVY:
return true;
default:
return false;
--- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
+++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
@@ -16,6 +16,10 @@
#include "rockchip_drm_vop2.h"
static const uint32_t formats_win_full_10bit[] = {
+ DRM_FORMAT_XRGB2101010,
+ DRM_FORMAT_ARGB2101010,
+ DRM_FORMAT_XBGR2101010,
+ DRM_FORMAT_ABGR2101010,
DRM_FORMAT_XRGB8888,
DRM_FORMAT_ARGB8888,
DRM_FORMAT_XBGR8888,
@@ -24,6 +28,10 @@ static const uint32_t formats_win_full_1
DRM_FORMAT_BGR888,
DRM_FORMAT_RGB565,
DRM_FORMAT_BGR565,
+ DRM_FORMAT_YUV420_8BIT, /* yuv420_8bit non-Linear mode only */
+ DRM_FORMAT_YUV420_10BIT, /* yuv420_10bit non-Linear mode only */
+ DRM_FORMAT_YUYV, /* yuv422_8bit non-Linear mode only*/
+ DRM_FORMAT_Y210, /* yuv422_10bit non-Linear mode only */
};
static const uint32_t formats_win_full_10bit_yuyv[] = {
@@ -35,11 +43,15 @@ static const uint32_t formats_win_full_1
DRM_FORMAT_BGR888,
DRM_FORMAT_RGB565,
DRM_FORMAT_BGR565,
- DRM_FORMAT_NV12,
- DRM_FORMAT_NV16,
- DRM_FORMAT_NV24,
- DRM_FORMAT_YVYU,
- DRM_FORMAT_VYUY,
+ DRM_FORMAT_NV12, /* yuv420_8bit linear mode, 2 plane */
+ DRM_FORMAT_NV21, /* yuv420_8bit linear mode, 2 plane */
+ DRM_FORMAT_NV15, /* yuv420_10bit linear mode, 2 plane, no padding */
+ DRM_FORMAT_NV16, /* yuv422_8bit linear mode, 2 plane */
+ DRM_FORMAT_NV61, /* yuv422_8bit linear mode, 2 plane */
+ DRM_FORMAT_NV24, /* yuv444_8bit linear mode, 2 plane */
+ DRM_FORMAT_NV42, /* yuv444_8bit linear mode, 2 plane */
+ DRM_FORMAT_YVYU, /* yuv422_8bit[YVYU] linear mode */
+ DRM_FORMAT_VYUY, /* yuv422_8bit[VYUY] linear mode */
};
static const uint32_t formats_win_lite[] = {

View File

@ -1,116 +0,0 @@
From 215737e37d07ade8952048339e37aec6c6f82223 Mon Sep 17 00:00:00 2001
From: Andy Yan <andy.yan@rock-chips.com>
Date: Wed, 18 Oct 2023 17:43:39 +0800
Subject: [PATCH] drm/rockchip: vop2: rename window formats to show window type
using them
formats_win_full_10bit is for cluster window,
formats_win_full_10bit_yuyv is for rk356x esmart, rk3588 esmart window
will support more format.
formats_win_lite is for smart window.
Rename it based the windows type may let meaning is clearer
Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
Acked-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231018094339.2476142-1-andyshrk@163.com
---
drivers/gpu/drm/rockchip/rockchip_vop2_reg.c | 30 ++++++++++----------
1 file changed, 15 insertions(+), 15 deletions(-)
--- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
+++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
@@ -15,7 +15,7 @@
#include "rockchip_drm_vop2.h"
-static const uint32_t formats_win_full_10bit[] = {
+static const uint32_t formats_cluster[] = {
DRM_FORMAT_XRGB2101010,
DRM_FORMAT_ARGB2101010,
DRM_FORMAT_XBGR2101010,
@@ -34,7 +34,7 @@ static const uint32_t formats_win_full_1
DRM_FORMAT_Y210, /* yuv422_10bit non-Linear mode only */
};
-static const uint32_t formats_win_full_10bit_yuyv[] = {
+static const uint32_t formats_rk356x_esmart[] = {
DRM_FORMAT_XRGB8888,
DRM_FORMAT_ARGB8888,
DRM_FORMAT_XBGR8888,
@@ -54,7 +54,7 @@ static const uint32_t formats_win_full_1
DRM_FORMAT_VYUY, /* yuv422_8bit[VYUY] linear mode */
};
-static const uint32_t formats_win_lite[] = {
+static const uint32_t formats_smart[] = {
DRM_FORMAT_XRGB8888,
DRM_FORMAT_ARGB8888,
DRM_FORMAT_XBGR8888,
@@ -153,8 +153,8 @@ static const struct vop2_win_data rk3568
.name = "Smart0-win0",
.phys_id = ROCKCHIP_VOP2_SMART0,
.base = 0x1c00,
- .formats = formats_win_lite,
- .nformats = ARRAY_SIZE(formats_win_lite),
+ .formats = formats_smart,
+ .nformats = ARRAY_SIZE(formats_smart),
.format_modifiers = format_modifiers,
.layer_sel_id = 3,
.supported_rotations = DRM_MODE_REFLECT_Y,
@@ -165,8 +165,8 @@ static const struct vop2_win_data rk3568
}, {
.name = "Smart1-win0",
.phys_id = ROCKCHIP_VOP2_SMART1,
- .formats = formats_win_lite,
- .nformats = ARRAY_SIZE(formats_win_lite),
+ .formats = formats_smart,
+ .nformats = ARRAY_SIZE(formats_smart),
.format_modifiers = format_modifiers,
.base = 0x1e00,
.layer_sel_id = 7,
@@ -178,8 +178,8 @@ static const struct vop2_win_data rk3568
}, {
.name = "Esmart1-win0",
.phys_id = ROCKCHIP_VOP2_ESMART1,
- .formats = formats_win_full_10bit_yuyv,
- .nformats = ARRAY_SIZE(formats_win_full_10bit_yuyv),
+ .formats = formats_rk356x_esmart,
+ .nformats = ARRAY_SIZE(formats_rk356x_esmart),
.format_modifiers = format_modifiers,
.base = 0x1a00,
.layer_sel_id = 6,
@@ -191,8 +191,8 @@ static const struct vop2_win_data rk3568
}, {
.name = "Esmart0-win0",
.phys_id = ROCKCHIP_VOP2_ESMART0,
- .formats = formats_win_full_10bit_yuyv,
- .nformats = ARRAY_SIZE(formats_win_full_10bit_yuyv),
+ .formats = formats_rk356x_esmart,
+ .nformats = ARRAY_SIZE(formats_rk356x_esmart),
.format_modifiers = format_modifiers,
.base = 0x1800,
.layer_sel_id = 2,
@@ -205,8 +205,8 @@ static const struct vop2_win_data rk3568
.name = "Cluster0-win0",
.phys_id = ROCKCHIP_VOP2_CLUSTER0,
.base = 0x1000,
- .formats = formats_win_full_10bit,
- .nformats = ARRAY_SIZE(formats_win_full_10bit),
+ .formats = formats_cluster,
+ .nformats = ARRAY_SIZE(formats_cluster),
.format_modifiers = format_modifiers_afbc,
.layer_sel_id = 0,
.supported_rotations = DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270 |
@@ -220,8 +220,8 @@ static const struct vop2_win_data rk3568
.name = "Cluster1-win0",
.phys_id = ROCKCHIP_VOP2_CLUSTER1,
.base = 0x1200,
- .formats = formats_win_full_10bit,
- .nformats = ARRAY_SIZE(formats_win_full_10bit),
+ .formats = formats_cluster,
+ .nformats = ARRAY_SIZE(formats_cluster),
.format_modifiers = format_modifiers_afbc,
.layer_sel_id = 1,
.supported_rotations = DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270 |

View File

@ -1,57 +0,0 @@
From 728c15b4b5f3369cbde73d5e0f14701ab370f985 Mon Sep 17 00:00:00 2001
From: Jonas Karlman <jonas@kwiboo.se>
Date: Mon, 23 Oct 2023 17:37:14 +0000
Subject: [PATCH] drm/fourcc: Add NV20 and NV30 YUV formats
DRM_FORMAT_NV20 and DRM_FORMAT_NV30 formats is the 2x1 and non-subsampled
variant of NV15, a 10-bit 2-plane YUV format that has no padding between
components. Instead, luminance and chrominance samples are grouped into 4s
so that each group is packed into an integer number of bytes:
YYYY = UVUV = 4 * 10 bits = 40 bits = 5 bytes
The '20' and '30' suffix refers to the optimum effective bits per pixel
which is achieved when the total number of luminance samples is a multiple
of 4.
V2: Added NV30 format
Signed-off-by: Jonas Karlman <jonas@kwiboo.se>
Reviewed-by: Sandy Huang <hjc@rock-chips.com>
Reviewed-by: Christopher Obbard <chris.obbard@collabora.com>
Tested-by: Christopher Obbard <chris.obbard@collabora.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231023173718.188102-2-jonas@kwiboo.se
---
drivers/gpu/drm/drm_fourcc.c | 8 ++++++++
include/uapi/drm/drm_fourcc.h | 2 ++
2 files changed, 10 insertions(+)
--- a/drivers/gpu/drm/drm_fourcc.c
+++ b/drivers/gpu/drm/drm_fourcc.c
@@ -299,6 +299,14 @@ const struct drm_format_info *__drm_form
.num_planes = 2, .char_per_block = { 5, 5, 0 },
.block_w = { 4, 2, 0 }, .block_h = { 1, 1, 0 }, .hsub = 2,
.vsub = 2, .is_yuv = true },
+ { .format = DRM_FORMAT_NV20, .depth = 0,
+ .num_planes = 2, .char_per_block = { 5, 5, 0 },
+ .block_w = { 4, 2, 0 }, .block_h = { 1, 1, 0 }, .hsub = 2,
+ .vsub = 1, .is_yuv = true },
+ { .format = DRM_FORMAT_NV30, .depth = 0,
+ .num_planes = 2, .char_per_block = { 5, 5, 0 },
+ .block_w = { 4, 2, 0 }, .block_h = { 1, 1, 0 }, .hsub = 1,
+ .vsub = 1, .is_yuv = true },
{ .format = DRM_FORMAT_Q410, .depth = 0,
.num_planes = 3, .char_per_block = { 2, 2, 2 },
.block_w = { 1, 1, 1 }, .block_h = { 1, 1, 1 }, .hsub = 1,
--- a/include/uapi/drm/drm_fourcc.h
+++ b/include/uapi/drm/drm_fourcc.h
@@ -323,6 +323,8 @@ extern "C" {
* index 1 = Cr:Cb plane, [39:0] Cr1:Cb1:Cr0:Cb0 little endian
*/
#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5') /* 2x2 subsampled Cr:Cb plane */
+#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0') /* 2x1 subsampled Cr:Cb plane */
+#define DRM_FORMAT_NV30 fourcc_code('N', 'V', '3', '0') /* non-subsampled Cr:Cb plane */
/*
* 2 plane YCbCr MSB aligned

View File

@ -1,231 +0,0 @@
From d4b384228562848e4b76b608a5876c92160e993c Mon Sep 17 00:00:00 2001
From: Jonas Karlman <jonas@kwiboo.se>
Date: Mon, 23 Oct 2023 17:37:15 +0000
Subject: [PATCH] drm/rockchip: vop: Add NV15, NV20 and NV30 support
Add support for displaying 10-bit 4:2:0 and 4:2:2 formats produced by
the Rockchip Video Decoder on RK322X, RK3288, RK3328 and RK3399.
Also add support for 10-bit 4:4:4 format while at it.
V5: Use drm_format_info_min_pitch() for correct bpp
Add missing NV21, NV61 and NV42 formats
V4: Rework RK3328/RK3399 win0/1 data to not affect RK3368
V2: Added NV30 support
Signed-off-by: Jonas Karlman <jonas@kwiboo.se>
Reviewed-by: Sandy Huang <hjc@rock-chips.com>
Reviewed-by: Christopher Obbard <chris.obbard@collabora.com>
Tested-by: Christopher Obbard <chris.obbard@collabora.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231023173718.188102-3-jonas@kwiboo.se
---
drivers/gpu/drm/rockchip/rockchip_drm_vop.c | 36 ++++++++---
drivers/gpu/drm/rockchip/rockchip_drm_vop.h | 1 +
drivers/gpu/drm/rockchip/rockchip_vop_reg.c | 66 +++++++++++++++++----
3 files changed, 86 insertions(+), 17 deletions(-)
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
@@ -280,6 +280,18 @@ static bool has_uv_swapped(uint32_t form
}
}
+static bool is_fmt_10(uint32_t format)
+{
+ switch (format) {
+ case DRM_FORMAT_NV15:
+ case DRM_FORMAT_NV20:
+ case DRM_FORMAT_NV30:
+ return true;
+ default:
+ return false;
+ }
+}
+
static enum vop_data_format vop_convert_format(uint32_t format)
{
switch (format) {
@@ -295,12 +307,15 @@ static enum vop_data_format vop_convert_
case DRM_FORMAT_BGR565:
return VOP_FMT_RGB565;
case DRM_FORMAT_NV12:
+ case DRM_FORMAT_NV15:
case DRM_FORMAT_NV21:
return VOP_FMT_YUV420SP;
case DRM_FORMAT_NV16:
+ case DRM_FORMAT_NV20:
case DRM_FORMAT_NV61:
return VOP_FMT_YUV422SP;
case DRM_FORMAT_NV24:
+ case DRM_FORMAT_NV30:
case DRM_FORMAT_NV42:
return VOP_FMT_YUV444SP;
default:
@@ -947,7 +962,12 @@ static void vop_plane_atomic_update(stru
dsp_sty = dest->y1 + crtc->mode.vtotal - crtc->mode.vsync_start;
dsp_st = dsp_sty << 16 | (dsp_stx & 0xffff);
- offset = (src->x1 >> 16) * fb->format->cpp[0];
+ if (fb->format->char_per_block[0])
+ offset = drm_format_info_min_pitch(fb->format, 0,
+ src->x1 >> 16);
+ else
+ offset = (src->x1 >> 16) * fb->format->cpp[0];
+
offset += (src->y1 >> 16) * fb->pitches[0];
dma_addr = rk_obj->dma_addr + offset + fb->offsets[0];
@@ -973,6 +993,7 @@ static void vop_plane_atomic_update(stru
}
VOP_WIN_SET(vop, win, format, format);
+ VOP_WIN_SET(vop, win, fmt_10, is_fmt_10(fb->format->format));
VOP_WIN_SET(vop, win, yrgb_vir, DIV_ROUND_UP(fb->pitches[0], 4));
VOP_WIN_SET(vop, win, yrgb_mst, dma_addr);
VOP_WIN_YUV2YUV_SET(vop, win_yuv2yuv, y2r_en, is_yuv);
@@ -982,15 +1003,16 @@ static void vop_plane_atomic_update(stru
(new_state->rotation & DRM_MODE_REFLECT_X) ? 1 : 0);
if (is_yuv) {
- int hsub = fb->format->hsub;
- int vsub = fb->format->vsub;
- int bpp = fb->format->cpp[1];
-
uv_obj = fb->obj[1];
rk_uv_obj = to_rockchip_obj(uv_obj);
- offset = (src->x1 >> 16) * bpp / hsub;
- offset += (src->y1 >> 16) * fb->pitches[1] / vsub;
+ if (fb->format->char_per_block[1])
+ offset = drm_format_info_min_pitch(fb->format, 1,
+ src->x1 >> 16);
+ else
+ offset = (src->x1 >> 16) * fb->format->cpp[1];
+ offset /= fb->format->hsub;
+ offset += (src->y1 >> 16) * fb->pitches[1] / fb->format->vsub;
dma_addr = rk_uv_obj->dma_addr + offset + fb->offsets[1];
VOP_WIN_SET(vop, win, uv_vir, DIV_ROUND_UP(fb->pitches[1], 4));
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.h
@@ -187,6 +187,7 @@ struct vop_win_phy {
struct vop_reg enable;
struct vop_reg gate;
struct vop_reg format;
+ struct vop_reg fmt_10;
struct vop_reg rb_swap;
struct vop_reg uv_swap;
struct vop_reg act_info;
--- a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c
+++ b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c
@@ -53,6 +53,26 @@ static const uint32_t formats_win_full[]
DRM_FORMAT_NV42,
};
+static const uint32_t formats_win_full_10[] = {
+ DRM_FORMAT_XRGB8888,
+ DRM_FORMAT_ARGB8888,
+ DRM_FORMAT_XBGR8888,
+ DRM_FORMAT_ABGR8888,
+ DRM_FORMAT_RGB888,
+ DRM_FORMAT_BGR888,
+ DRM_FORMAT_RGB565,
+ DRM_FORMAT_BGR565,
+ DRM_FORMAT_NV12,
+ DRM_FORMAT_NV21,
+ DRM_FORMAT_NV16,
+ DRM_FORMAT_NV61,
+ DRM_FORMAT_NV24,
+ DRM_FORMAT_NV42,
+ DRM_FORMAT_NV15,
+ DRM_FORMAT_NV20,
+ DRM_FORMAT_NV30,
+};
+
static const uint64_t format_modifiers_win_full[] = {
DRM_FORMAT_MOD_LINEAR,
DRM_FORMAT_MOD_INVALID,
@@ -629,11 +649,12 @@ static const struct vop_scl_regs rk3288_
static const struct vop_win_phy rk3288_win01_data = {
.scl = &rk3288_win_full_scl,
- .data_formats = formats_win_full,
- .nformats = ARRAY_SIZE(formats_win_full),
+ .data_formats = formats_win_full_10,
+ .nformats = ARRAY_SIZE(formats_win_full_10),
.format_modifiers = format_modifiers_win_full,
.enable = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 0),
.format = VOP_REG(RK3288_WIN0_CTRL0, 0x7, 1),
+ .fmt_10 = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 4),
.rb_swap = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 12),
.uv_swap = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 15),
.act_info = VOP_REG(RK3288_WIN0_ACT_INFO, 0x1fff1fff, 0),
@@ -938,13 +959,38 @@ static const struct vop_win_yuv2yuv_data
};
-static const struct vop_win_phy rk3399_win01_data = {
+static const struct vop_win_phy rk3399_win0_data = {
.scl = &rk3288_win_full_scl,
- .data_formats = formats_win_full,
- .nformats = ARRAY_SIZE(formats_win_full),
+ .data_formats = formats_win_full_10,
+ .nformats = ARRAY_SIZE(formats_win_full_10),
.format_modifiers = format_modifiers_win_full_afbc,
.enable = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 0),
.format = VOP_REG(RK3288_WIN0_CTRL0, 0x7, 1),
+ .fmt_10 = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 4),
+ .rb_swap = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 12),
+ .uv_swap = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 15),
+ .x_mir_en = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 21),
+ .y_mir_en = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 22),
+ .act_info = VOP_REG(RK3288_WIN0_ACT_INFO, 0x1fff1fff, 0),
+ .dsp_info = VOP_REG(RK3288_WIN0_DSP_INFO, 0x0fff0fff, 0),
+ .dsp_st = VOP_REG(RK3288_WIN0_DSP_ST, 0x1fff1fff, 0),
+ .yrgb_mst = VOP_REG(RK3288_WIN0_YRGB_MST, 0xffffffff, 0),
+ .uv_mst = VOP_REG(RK3288_WIN0_CBR_MST, 0xffffffff, 0),
+ .yrgb_vir = VOP_REG(RK3288_WIN0_VIR, 0x3fff, 0),
+ .uv_vir = VOP_REG(RK3288_WIN0_VIR, 0x3fff, 16),
+ .src_alpha_ctl = VOP_REG(RK3288_WIN0_SRC_ALPHA_CTRL, 0xff, 0),
+ .dst_alpha_ctl = VOP_REG(RK3288_WIN0_DST_ALPHA_CTRL, 0xff, 0),
+ .channel = VOP_REG(RK3288_WIN0_CTRL2, 0xff, 0),
+};
+
+static const struct vop_win_phy rk3399_win1_data = {
+ .scl = &rk3288_win_full_scl,
+ .data_formats = formats_win_full_10,
+ .nformats = ARRAY_SIZE(formats_win_full_10),
+ .format_modifiers = format_modifiers_win_full,
+ .enable = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 0),
+ .format = VOP_REG(RK3288_WIN0_CTRL0, 0x7, 1),
+ .fmt_10 = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 4),
.rb_swap = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 12),
.uv_swap = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 15),
.x_mir_en = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 21),
@@ -967,9 +1013,9 @@ static const struct vop_win_phy rk3399_w
* AFBC on the primary plane.
*/
static const struct vop_win_data rk3399_vop_win_data[] = {
- { .base = 0x00, .phy = &rk3399_win01_data,
+ { .base = 0x00, .phy = &rk3399_win0_data,
.type = DRM_PLANE_TYPE_PRIMARY },
- { .base = 0x40, .phy = &rk3368_win01_data,
+ { .base = 0x40, .phy = &rk3399_win1_data,
.type = DRM_PLANE_TYPE_OVERLAY },
{ .base = 0x00, .phy = &rk3368_win23_data,
.type = DRM_PLANE_TYPE_OVERLAY },
@@ -1101,11 +1147,11 @@ static const struct vop_intr rk3328_vop_
};
static const struct vop_win_data rk3328_vop_win_data[] = {
- { .base = 0xd0, .phy = &rk3368_win01_data,
+ { .base = 0xd0, .phy = &rk3399_win1_data,
.type = DRM_PLANE_TYPE_PRIMARY },
- { .base = 0x1d0, .phy = &rk3368_win01_data,
+ { .base = 0x1d0, .phy = &rk3399_win1_data,
.type = DRM_PLANE_TYPE_OVERLAY },
- { .base = 0x2d0, .phy = &rk3368_win01_data,
+ { .base = 0x2d0, .phy = &rk3399_win1_data,
.type = DRM_PLANE_TYPE_CURSOR },
};

View File

@ -1,67 +0,0 @@
From 5fc6aa7db080fd90ef00846aac04e8a211088132 Mon Sep 17 00:00:00 2001
From: Jonas Karlman <jonas@kwiboo.se>
Date: Wed, 25 Oct 2023 21:32:46 +0000
Subject: [PATCH] drm/rockchip: vop2: Add NV20 and NV30 support
Add support for the 10-bit 4:2:2 and 4:4:4 formats NV20 and NV30.
These formats can be tested using modetest [1]:
modetest -P <plane_id>@<crtc_id>:1920x1080@<format>
e.g. on a ROCK 3 Model A (rk3568):
modetest -P 43@67:1920x1080@NV20 -F tiles,tiles
modetest -P 43@67:1920x1080@NV30 -F smpte,smpte
[1] https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/329
Signed-off-by: Jonas Karlman <jonas@kwiboo.se>
Reviewed-by: Christopher Obbard <chris.obbard@collabora.com>
Tested-by: Christopher Obbard <chris.obbard@collabora.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231025213248.2641962-1-jonas@kwiboo.se
---
drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 5 +++++
drivers/gpu/drm/rockchip/rockchip_vop2_reg.c | 2 ++
2 files changed, 7 insertions(+)
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
@@ -324,11 +324,14 @@ static enum vop2_data_format vop2_conver
case DRM_FORMAT_NV16:
case DRM_FORMAT_NV61:
return VOP2_FMT_YUV422SP;
+ case DRM_FORMAT_NV20:
case DRM_FORMAT_Y210:
return VOP2_FMT_YUV422SP_10;
case DRM_FORMAT_NV24:
case DRM_FORMAT_NV42:
return VOP2_FMT_YUV444SP;
+ case DRM_FORMAT_NV30:
+ return VOP2_FMT_YUV444SP_10;
case DRM_FORMAT_YUYV:
case DRM_FORMAT_YVYU:
return VOP2_FMT_VYUY422;
@@ -413,6 +416,8 @@ static bool vop2_win_uv_swap(u32 format)
case DRM_FORMAT_NV16:
case DRM_FORMAT_NV24:
case DRM_FORMAT_NV15:
+ case DRM_FORMAT_NV20:
+ case DRM_FORMAT_NV30:
case DRM_FORMAT_YUYV:
case DRM_FORMAT_UYVY:
return true;
--- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
+++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
@@ -48,8 +48,10 @@ static const uint32_t formats_rk356x_esm
DRM_FORMAT_NV15, /* yuv420_10bit linear mode, 2 plane, no padding */
DRM_FORMAT_NV16, /* yuv422_8bit linear mode, 2 plane */
DRM_FORMAT_NV61, /* yuv422_8bit linear mode, 2 plane */
+ DRM_FORMAT_NV20, /* yuv422_10bit linear mode, 2 plane, no padding */
DRM_FORMAT_NV24, /* yuv444_8bit linear mode, 2 plane */
DRM_FORMAT_NV42, /* yuv444_8bit linear mode, 2 plane */
+ DRM_FORMAT_NV30, /* yuv444_10bit linear mode, 2 plane, no padding */
DRM_FORMAT_YVYU, /* yuv422_8bit[YVYU] linear mode */
DRM_FORMAT_VYUY, /* yuv422_8bit[VYUY] linear mode */
};

View File

@ -1,40 +0,0 @@
From 1044f4a31734eef000f42cdaaf35bb2f76286be5 Mon Sep 17 00:00:00 2001
From: Johan Jonker <jbx6244@gmail.com>
Date: Thu, 2 Nov 2023 14:41:48 +0100
Subject: [PATCH] drm/rockchip: rk3066_hdmi: Remove useless mode_fixup
The mode_fixup implementation doesn't do anything, so we can simply
remove it.
Signed-off-by: Johan Jonker <jbx6244@gmail.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/5649ac03-db92-42a9-d86a-76dfa1af7c64@gmail.com
---
drivers/gpu/drm/rockchip/rk3066_hdmi.c | 9 ---------
1 file changed, 9 deletions(-)
--- a/drivers/gpu/drm/rockchip/rk3066_hdmi.c
+++ b/drivers/gpu/drm/rockchip/rk3066_hdmi.c
@@ -433,14 +433,6 @@ static void rk3066_hdmi_encoder_disable(
rk3066_hdmi_set_power_mode(hdmi, HDMI_SYS_POWER_MODE_A);
}
-static bool
-rk3066_hdmi_encoder_mode_fixup(struct drm_encoder *encoder,
- const struct drm_display_mode *mode,
- struct drm_display_mode *adj_mode)
-{
- return true;
-}
-
static int
rk3066_hdmi_encoder_atomic_check(struct drm_encoder *encoder,
struct drm_crtc_state *crtc_state,
@@ -458,7 +450,6 @@ static const
struct drm_encoder_helper_funcs rk3066_hdmi_encoder_helper_funcs = {
.enable = rk3066_hdmi_encoder_enable,
.disable = rk3066_hdmi_encoder_disable,
- .mode_fixup = rk3066_hdmi_encoder_mode_fixup,
.mode_set = rk3066_hdmi_encoder_mode_set,
.atomic_check = rk3066_hdmi_encoder_atomic_check,
};

View File

@ -1,88 +0,0 @@
From ae3436a5e7c2ef4f92938133bd99f92fc47ea34e Mon Sep 17 00:00:00 2001
From: Johan Jonker <jbx6244@gmail.com>
Date: Thu, 2 Nov 2023 14:42:04 +0100
Subject: [PATCH] drm/rockchip: rk3066_hdmi: Switch encoder hooks to atomic
The rk3066_hdmi encoder still uses the non atomic variants
of enable and disable. Convert to their atomic equivalents.
In atomic mode there is no need to save the adjusted mode,
so remove the mode_set function.
Signed-off-by: Johan Jonker <jbx6244@gmail.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/034c3446-d619-f4c3-3aaa-ab51dc19d07f@gmail.com
---
drivers/gpu/drm/rockchip/rk3066_hdmi.c | 35 +++++++++++++-------------
1 file changed, 17 insertions(+), 18 deletions(-)
--- a/drivers/gpu/drm/rockchip/rk3066_hdmi.c
+++ b/drivers/gpu/drm/rockchip/rk3066_hdmi.c
@@ -54,7 +54,6 @@ struct rk3066_hdmi {
unsigned int tmdsclk;
struct hdmi_data_info hdmi_data;
- struct drm_display_mode previous_mode;
};
static struct rk3066_hdmi *encoder_to_rk3066_hdmi(struct drm_encoder *encoder)
@@ -386,21 +385,21 @@ static int rk3066_hdmi_setup(struct rk30
return 0;
}
-static void
-rk3066_hdmi_encoder_mode_set(struct drm_encoder *encoder,
- struct drm_display_mode *mode,
- struct drm_display_mode *adj_mode)
+static void rk3066_hdmi_encoder_enable(struct drm_encoder *encoder,
+ struct drm_atomic_state *state)
{
struct rk3066_hdmi *hdmi = encoder_to_rk3066_hdmi(encoder);
+ struct drm_connector_state *conn_state;
+ struct drm_crtc_state *crtc_state;
+ int mux, val;
- /* Store the display mode for plugin/DPMS poweron events. */
- drm_mode_copy(&hdmi->previous_mode, adj_mode);
-}
+ conn_state = drm_atomic_get_new_connector_state(state, &hdmi->connector);
+ if (WARN_ON(!conn_state))
+ return;
-static void rk3066_hdmi_encoder_enable(struct drm_encoder *encoder)
-{
- struct rk3066_hdmi *hdmi = encoder_to_rk3066_hdmi(encoder);
- int mux, val;
+ crtc_state = drm_atomic_get_new_crtc_state(state, conn_state->crtc);
+ if (WARN_ON(!crtc_state))
+ return;
mux = drm_of_encoder_active_endpoint_id(hdmi->dev->of_node, encoder);
if (mux)
@@ -413,10 +412,11 @@ static void rk3066_hdmi_encoder_enable(s
DRM_DEV_DEBUG(hdmi->dev, "hdmi encoder enable select: vop%s\n",
(mux) ? "1" : "0");
- rk3066_hdmi_setup(hdmi, &hdmi->previous_mode);
+ rk3066_hdmi_setup(hdmi, &crtc_state->adjusted_mode);
}
-static void rk3066_hdmi_encoder_disable(struct drm_encoder *encoder)
+static void rk3066_hdmi_encoder_disable(struct drm_encoder *encoder,
+ struct drm_atomic_state *state)
{
struct rk3066_hdmi *hdmi = encoder_to_rk3066_hdmi(encoder);
@@ -448,10 +448,9 @@ rk3066_hdmi_encoder_atomic_check(struct
static const
struct drm_encoder_helper_funcs rk3066_hdmi_encoder_helper_funcs = {
- .enable = rk3066_hdmi_encoder_enable,
- .disable = rk3066_hdmi_encoder_disable,
- .mode_set = rk3066_hdmi_encoder_mode_set,
- .atomic_check = rk3066_hdmi_encoder_atomic_check,
+ .atomic_check = rk3066_hdmi_encoder_atomic_check,
+ .atomic_enable = rk3066_hdmi_encoder_enable,
+ .atomic_disable = rk3066_hdmi_encoder_disable,
};
static enum drm_connector_status

View File

@ -1,43 +0,0 @@
From f4814c20d14ca168382e8887c768f290e4a2a861 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 22 Nov 2023 23:18:29 +0100
Subject: [PATCH] drm/rockchip: rk3066_hdmi: include drm/drm_atomic.h
Without this header, the newly added code fails to build:
drivers/gpu/drm/rockchip/rk3066_hdmi.c: In function 'rk3066_hdmi_encoder_enable':
drivers/gpu/drm/rockchip/rk3066_hdmi.c:397:22: error: implicit declaration of function 'drm_atomic_get_new_connector_state'; did you mean 'drm_atomic_helper_connector_reset'? [-Werror=implicit-function-declaration]
397 | conn_state = drm_atomic_get_new_connector_state(state, &hdmi->connector);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
| drm_atomic_helper_connector_reset
drivers/gpu/drm/rockchip/rk3066_hdmi.c:397:20: error: assignment to 'struct drm_connector_state *' from 'int' makes pointer from integer without a cast [-Werror=int-conversion]
397 | conn_state = drm_atomic_get_new_connector_state(state, &hdmi->connector);
| ^
drivers/gpu/drm/rockchip/rk3066_hdmi.c:401:22: error: implicit declaration of function 'drm_atomic_get_new_crtc_state'; did you mean 'drm_atomic_helper_swap_state'? [-Werror=implicit-function-declaration]
401 | crtc_state = drm_atomic_get_new_crtc_state(state, conn_state->crtc);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~
| drm_atomic_helper_swap_state
drivers/gpu/drm/rockchip/rk3066_hdmi.c:401:20: error: assignment to 'struct drm_crtc_state *' from 'int' makes pointer from integer without a cast [-Werror=int-conversion]
401 | crtc_state = drm_atomic_get_new_crtc_state(state, conn_state->crtc);
| ^
Fixes: ae3436a5e7c2 ("drm/rockchip: rk3066_hdmi: Switch encoder hooks to atomic")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org> # build-tested
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231122221838.3164349-1-arnd@kernel.org
---
drivers/gpu/drm/rockchip/rk3066_hdmi.c | 1 +
1 file changed, 1 insertion(+)
--- a/drivers/gpu/drm/rockchip/rk3066_hdmi.c
+++ b/drivers/gpu/drm/rockchip/rk3066_hdmi.c
@@ -4,6 +4,7 @@
* Zheng Yang <zhengyang@rock-chips.com>
*/
+#include <drm/drm_atomic.h>
#include <drm/drm_edid.h>
#include <drm/drm_of.h>
#include <drm/drm_probe_helper.h>

View File

@ -1,60 +0,0 @@
From 81a06f1d02e588cfa14c5e5953d9dc50b1d404be Mon Sep 17 00:00:00 2001
From: Andy Yan <andy.yan@rock-chips.com>
Date: Mon, 11 Dec 2023 19:57:19 +0800
Subject: [PATCH] Revert "drm/rockchip: vop2: Use regcache_sync() to fix
suspend/resume"
This reverts commit b63a553e8f5aa6574eeb535a551817a93c426d8c.
regcache_sync will try to reload the configuration in regcache to
hardware, but the registers of 4 Cluster windows and Esmart1/2/3 on
the upcoming rk3588 can not be set successfully before internal PD
power on.
Also it's better to keep the hardware register as it is before we really
enable it.
So let's revert this version, and keep the first version:
commit afa965a45e01 ("drm/rockchip: vop2: fix suspend/resume")
Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
Reviewed-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231211115719.1784834-1-andyshrk@163.com
---
drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
@@ -216,6 +216,8 @@ struct vop2 {
struct vop2_win win[];
};
+static const struct regmap_config vop2_regmap_config;
+
static struct vop2_video_port *to_vop2_video_port(struct drm_crtc *crtc)
{
return container_of(crtc, struct vop2_video_port, crtc);
@@ -894,7 +896,11 @@ static void vop2_enable(struct vop2 *vop
return;
}
- regcache_sync(vop2->map);
+ ret = regmap_reinit_cache(vop2->map, &vop2_regmap_config);
+ if (ret) {
+ drm_err(vop2->drm, "failed to reinit cache: %d\n", ret);
+ return;
+ }
if (vop2->data->soc_id == 3566)
vop2_writel(vop2, RK3568_OTP_WIN_EN, 1);
@@ -924,8 +930,6 @@ static void vop2_disable(struct vop2 *vo
pm_runtime_put_sync(vop2->dev);
- regcache_mark_dirty(vop2->map);
-
clk_disable_unprepare(vop2->aclk);
clk_disable_unprepare(vop2->hclk);
}

View File

@ -1,83 +0,0 @@
From bebad6bd4fbdc448ad3b337ad281b813e68f6f53 Mon Sep 17 00:00:00 2001
From: Andy Yan <andy.yan@rock-chips.com>
Date: Mon, 11 Dec 2023 19:57:30 +0800
Subject: [PATCH] drm/rockchip: vop2: set half_block_en bit in all mode
At first we thought the half_block_en bit in AFBCD_CTRL register
only work in afbc mode. But the fact is that it control the line
buffer in all mode(afbc/tile/linear), so we need configure it in
all case.
As the cluster windows of rk3568 only supports afbc format
so is therefore not affected.
Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
Reviewed-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231211115730.1784893-1-andyshrk@163.com
---
drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 25 ++++++++++++++------
1 file changed, 18 insertions(+), 7 deletions(-)
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
@@ -530,6 +530,18 @@ static bool rockchip_vop2_mod_supported(
return vop2_convert_afbc_format(format) >= 0;
}
+/*
+ * 0: Full mode, 16 lines for one tail
+ * 1: half block mode, 8 lines one tail
+ */
+static bool vop2_half_block_enable(struct drm_plane_state *pstate)
+{
+ if (pstate->rotation & (DRM_MODE_ROTATE_270 | DRM_MODE_ROTATE_90))
+ return false;
+ else
+ return true;
+}
+
static u32 vop2_afbc_transform_offset(struct drm_plane_state *pstate,
bool afbc_half_block_en)
{
@@ -1155,6 +1167,7 @@ static void vop2_plane_atomic_update(str
bool rotate_90 = pstate->rotation & DRM_MODE_ROTATE_90;
struct rockchip_gem_object *rk_obj;
unsigned long offset;
+ bool half_block_en;
bool afbc_en;
dma_addr_t yrgb_mst;
dma_addr_t uv_mst;
@@ -1247,6 +1260,7 @@ static void vop2_plane_atomic_update(str
dsp_info = (dsp_h - 1) << 16 | ((dsp_w - 1) & 0xffff);
format = vop2_convert_format(fb->format->format);
+ half_block_en = vop2_half_block_enable(pstate);
drm_dbg(vop2->drm, "vp%d update %s[%dx%d->%dx%d@%dx%d] fmt[%p4cc_%s] addr[%pad]\n",
vp->id, win->data->name, actual_w, actual_h, dsp_w, dsp_h,
@@ -1254,6 +1268,9 @@ static void vop2_plane_atomic_update(str
&fb->format->format,
afbc_en ? "AFBC" : "", &yrgb_mst);
+ if (vop2_cluster_window(win))
+ vop2_win_write(win, VOP2_WIN_AFBC_HALF_BLOCK_EN, half_block_en);
+
if (afbc_en) {
u32 stride;
@@ -1294,13 +1311,7 @@ static void vop2_plane_atomic_update(str
vop2_win_write(win, VOP2_WIN_AFBC_UV_SWAP, uv_swap);
vop2_win_write(win, VOP2_WIN_AFBC_AUTO_GATING_EN, 0);
vop2_win_write(win, VOP2_WIN_AFBC_BLOCK_SPLIT_EN, 0);
- if (pstate->rotation & (DRM_MODE_ROTATE_270 | DRM_MODE_ROTATE_90)) {
- vop2_win_write(win, VOP2_WIN_AFBC_HALF_BLOCK_EN, 0);
- transform_offset = vop2_afbc_transform_offset(pstate, false);
- } else {
- vop2_win_write(win, VOP2_WIN_AFBC_HALF_BLOCK_EN, 1);
- transform_offset = vop2_afbc_transform_offset(pstate, true);
- }
+ transform_offset = vop2_afbc_transform_offset(pstate, half_block_en);
vop2_win_write(win, VOP2_WIN_AFBC_HDR_PTR, yrgb_mst);
vop2_win_write(win, VOP2_WIN_AFBC_PIC_SIZE, act_info);
vop2_win_write(win, VOP2_WIN_AFBC_TRANSFORM_OFFSET, transform_offset);

View File

@ -1,50 +0,0 @@
From d1f8face0fc1298c88ef4a0479c3027b46ca2c77 Mon Sep 17 00:00:00 2001
From: Andy Yan <andy.yan@rock-chips.com>
Date: Mon, 11 Dec 2023 19:57:52 +0800
Subject: [PATCH] drm/rockchip: vop2: Add write mask for VP config done
The write mask bit is used to make sure when writing
config done bit for one VP will not overwrite the other.
Unfortunately, the write mask bit is missing on
rk3566/8, that means when we write to these bits,
it will not take any effect.
We need this to make the vop work properly after
rk3566/8 variants.
Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231211115752.1785013-1-andyshrk@163.com
---
drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 15 +++++++++++++--
1 file changed, 13 insertions(+), 2 deletions(-)
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
@@ -267,12 +267,23 @@ static bool vop2_cluster_window(const st
return win->data->feature & WIN_FEATURE_CLUSTER;
}
+/*
+ * Note:
+ * The write mask function is documented but missing on rk3566/8, writes
+ * to these bits have no effect. For newer soc(rk3588 and following) the
+ * write mask is needed for register writes.
+ *
+ * GLB_CFG_DONE_EN has no write mask bit.
+ *
+ */
static void vop2_cfg_done(struct vop2_video_port *vp)
{
struct vop2 *vop2 = vp->vop2;
+ u32 val = RK3568_REG_CFG_DONE__GLB_CFG_DONE_EN;
- regmap_set_bits(vop2->map, RK3568_REG_CFG_DONE,
- BIT(vp->id) | RK3568_REG_CFG_DONE__GLB_CFG_DONE_EN);
+ val |= BIT(vp->id) | (BIT(vp->id) << 16);
+
+ regmap_set_bits(vop2->map, RK3568_REG_CFG_DONE, val);
}
static void vop2_win_disable(struct vop2_win *win)

View File

@ -1,50 +0,0 @@
From c408af1afc4b74ea6df69e0313be97f1f83e981a Mon Sep 17 00:00:00 2001
From: Andy Yan <andy.yan@rock-chips.com>
Date: Mon, 11 Dec 2023 19:58:26 +0800
Subject: [PATCH] drm/rockchip: vop2: rename grf to sys_grf
The vop2 need to reference more grf(system grf, vop grf, vo0/1 grf,etc)
in the upcoming rk3588.
So we rename the current system grf to sys_grf.
Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
Reviewed-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231211115826.1785190-1-andyshrk@163.com
---
drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
@@ -189,7 +189,7 @@ struct vop2 {
void __iomem *regs;
struct regmap *map;
- struct regmap *grf;
+ struct regmap *sys_grf;
/* physical map length of vop2 register */
u32 len;
@@ -1535,9 +1535,9 @@ static void rk3568_set_intf_mux(struct v
dip &= ~RK3568_DSP_IF_POL__RGB_LVDS_PIN_POL;
dip |= FIELD_PREP(RK3568_DSP_IF_POL__RGB_LVDS_PIN_POL, polflags);
if (polflags & POLFLAG_DCLK_INV)
- regmap_write(vop2->grf, RK3568_GRF_VO_CON1, BIT(3 + 16) | BIT(3));
+ regmap_write(vop2->sys_grf, RK3568_GRF_VO_CON1, BIT(3 + 16) | BIT(3));
else
- regmap_write(vop2->grf, RK3568_GRF_VO_CON1, BIT(3 + 16));
+ regmap_write(vop2->sys_grf, RK3568_GRF_VO_CON1, BIT(3 + 16));
break;
case ROCKCHIP_VOP2_EP_HDMI0:
die &= ~RK3568_SYS_DSP_INFACE_EN_HDMI_MUX;
@@ -2821,7 +2821,7 @@ static int vop2_bind(struct device *dev,
return PTR_ERR(vop2->lut_regs);
}
- vop2->grf = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,grf");
+ vop2->sys_grf = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,grf");
vop2->hclk = devm_clk_get(vop2->dev, "hclk");
if (IS_ERR(vop2->hclk)) {

View File

@ -1,28 +0,0 @@
From dc7226acacc6502291446f9e33cf96246ec49a30 Mon Sep 17 00:00:00 2001
From: Andy Yan <andy.yan@rock-chips.com>
Date: Mon, 11 Dec 2023 19:59:07 +0800
Subject: [PATCH] dt-bindings: rockchip,vop2: Add more endpoint definition
There are 2 HDMI, 2 DP, 2 eDP on rk3588, so add
corresponding endpoint definition for it.
Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231211115907.1785377-1-andyshrk@163.com
---
include/dt-bindings/soc/rockchip,vop2.h | 4 ++++
1 file changed, 4 insertions(+)
--- a/include/dt-bindings/soc/rockchip,vop2.h
+++ b/include/dt-bindings/soc/rockchip,vop2.h
@@ -10,5 +10,9 @@
#define ROCKCHIP_VOP2_EP_LVDS0 5
#define ROCKCHIP_VOP2_EP_MIPI1 6
#define ROCKCHIP_VOP2_EP_LVDS1 7
+#define ROCKCHIP_VOP2_EP_HDMI1 8
+#define ROCKCHIP_VOP2_EP_EDP1 9
+#define ROCKCHIP_VOP2_EP_DP0 10
+#define ROCKCHIP_VOP2_EP_DP1 11
#endif /* __DT_BINDINGS_ROCKCHIP_VOP2_H */

View File

@ -1,997 +0,0 @@
From 5a028e8f062fc862f051f8e62a0d5a1abac91955 Mon Sep 17 00:00:00 2001
From: Andy Yan <andy.yan@rock-chips.com>
Date: Mon, 11 Dec 2023 19:59:19 +0800
Subject: [PATCH] drm/rockchip: vop2: Add support for rk3588
VOP2 on rk3588:
Four video ports:
VP0 Max 4096x2160
VP1 Max 4096x2160
VP2 Max 4096x2160
VP3 Max 2048x1080
4 4K Cluster windows with AFBC/line RGB and AFBC-only YUV support
4 4K Esmart windows with line RGB/YUV support
Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
Reviewed-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231211115919.1785435-1-andyshrk@163.com
---
drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 400 ++++++++++++++++++-
drivers/gpu/drm/rockchip/rockchip_drm_vop2.h | 81 ++++
drivers/gpu/drm/rockchip/rockchip_vop2_reg.c | 221 ++++++++++
3 files changed, 696 insertions(+), 6 deletions(-)
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
@@ -190,6 +190,9 @@ struct vop2 {
struct regmap *map;
struct regmap *sys_grf;
+ struct regmap *vop_grf;
+ struct regmap *vo1_grf;
+ struct regmap *sys_pmu;
/* physical map length of vop2 register */
u32 len;
@@ -208,6 +211,7 @@ struct vop2 {
unsigned int enable_count;
struct clk *hclk;
struct clk *aclk;
+ struct clk *pclk;
/* optional internal rgb encoder */
struct rockchip_rgb *rgb;
@@ -216,6 +220,23 @@ struct vop2 {
struct vop2_win win[];
};
+#define vop2_output_if_is_hdmi(x) ((x) == ROCKCHIP_VOP2_EP_HDMI0 || \
+ (x) == ROCKCHIP_VOP2_EP_HDMI1)
+
+#define vop2_output_if_is_dp(x) ((x) == ROCKCHIP_VOP2_EP_DP0 || \
+ (x) == ROCKCHIP_VOP2_EP_DP1)
+
+#define vop2_output_if_is_edp(x) ((x) == ROCKCHIP_VOP2_EP_EDP0 || \
+ (x) == ROCKCHIP_VOP2_EP_EDP1)
+
+#define vop2_output_if_is_mipi(x) ((x) == ROCKCHIP_VOP2_EP_MIPI0 || \
+ (x) == ROCKCHIP_VOP2_EP_MIPI1)
+
+#define vop2_output_if_is_lvds(x) ((x) == ROCKCHIP_VOP2_EP_LVDS0 || \
+ (x) == ROCKCHIP_VOP2_EP_LVDS1)
+
+#define vop2_output_if_is_dpi(x) ((x) == ROCKCHIP_VOP2_EP_RGB0)
+
static const struct regmap_config vop2_regmap_config;
static struct vop2_video_port *to_vop2_video_port(struct drm_crtc *crtc)
@@ -474,6 +495,17 @@ static bool vop2_output_uv_swap(u32 bus_
return false;
}
+static bool vop2_output_rg_swap(struct vop2 *vop2, u32 bus_format)
+{
+ if (vop2->data->soc_id == 3588) {
+ if (bus_format == MEDIA_BUS_FMT_YUV8_1X24 ||
+ bus_format == MEDIA_BUS_FMT_YUV10_1X30)
+ return true;
+ }
+
+ return false;
+}
+
static bool is_yuv_output(u32 bus_format)
{
switch (bus_format) {
@@ -890,13 +922,32 @@ static int vop2_core_clks_prepare_enable
goto err;
}
+ ret = clk_prepare_enable(vop2->pclk);
+ if (ret < 0) {
+ drm_err(vop2->drm, "failed to enable pclk - %d\n", ret);
+ goto err1;
+ }
+
return 0;
+err1:
+ clk_disable_unprepare(vop2->aclk);
err:
clk_disable_unprepare(vop2->hclk);
return ret;
}
+static void rk3588_vop2_power_domain_enable_all(struct vop2 *vop2)
+{
+ u32 pd;
+
+ pd = vop2_readl(vop2, RK3588_SYS_PD_CTRL);
+ pd &= ~(VOP2_PD_CLUSTER0 | VOP2_PD_CLUSTER1 | VOP2_PD_CLUSTER2 |
+ VOP2_PD_CLUSTER3 | VOP2_PD_ESMART);
+
+ vop2_writel(vop2, RK3588_SYS_PD_CTRL, pd);
+}
+
static void vop2_enable(struct vop2 *vop2)
{
int ret;
@@ -928,6 +979,9 @@ static void vop2_enable(struct vop2 *vop
if (vop2->data->soc_id == 3566)
vop2_writel(vop2, RK3568_OTP_WIN_EN, 1);
+ if (vop2->data->soc_id == 3588)
+ rk3588_vop2_power_domain_enable_all(vop2);
+
vop2_writel(vop2, RK3568_REG_CFG_DONE, RK3568_REG_CFG_DONE__GLB_CFG_DONE_EN);
/*
@@ -953,6 +1007,7 @@ static void vop2_disable(struct vop2 *vo
pm_runtime_put_sync(vop2->dev);
+ clk_disable_unprepare(vop2->pclk);
clk_disable_unprepare(vop2->aclk);
clk_disable_unprepare(vop2->hclk);
}
@@ -1320,7 +1375,19 @@ static void vop2_plane_atomic_update(str
vop2_win_write(win, VOP2_WIN_AFBC_ENABLE, 1);
vop2_win_write(win, VOP2_WIN_AFBC_FORMAT, afbc_format);
vop2_win_write(win, VOP2_WIN_AFBC_UV_SWAP, uv_swap);
- vop2_win_write(win, VOP2_WIN_AFBC_AUTO_GATING_EN, 0);
+ /*
+ * On rk3566/8, this bit is auto gating enable,
+ * but this function is not work well so we need
+ * to disable it for these two platform.
+ * On rk3588, and the following new soc(rk3528/rk3576),
+ * this bit is gating disable, we should write 1 to
+ * disable gating when enable afbc.
+ */
+ if (vop2->data->soc_id == 3566 || vop2->data->soc_id == 3568)
+ vop2_win_write(win, VOP2_WIN_AFBC_AUTO_GATING_EN, 0);
+ else
+ vop2_win_write(win, VOP2_WIN_AFBC_AUTO_GATING_EN, 1);
+
vop2_win_write(win, VOP2_WIN_AFBC_BLOCK_SPLIT_EN, 0);
transform_offset = vop2_afbc_transform_offset(pstate, half_block_en);
vop2_win_write(win, VOP2_WIN_AFBC_HDR_PTR, yrgb_mst);
@@ -1518,10 +1585,10 @@ static void vop2_post_config(struct drm_
vop2_vp_write(vp, RK3568_VP_DSP_BG, 0);
}
-static void rk3568_set_intf_mux(struct vop2_video_port *vp, int id,
- u32 polflags)
+static unsigned long rk3568_set_intf_mux(struct vop2_video_port *vp, int id, u32 polflags)
{
struct vop2 *vop2 = vp->vop2;
+ struct drm_crtc *crtc = &vp->crtc;
u32 die, dip;
die = vop2_readl(vop2, RK3568_DSP_IF_EN);
@@ -1583,13 +1650,281 @@ static void rk3568_set_intf_mux(struct v
break;
default:
drm_err(vop2->drm, "Invalid interface id %d on vp%d\n", id, vp->id);
- return;
+ return 0;
}
dip |= RK3568_DSP_IF_POL__CFG_DONE_IMD;
vop2_writel(vop2, RK3568_DSP_IF_EN, die);
vop2_writel(vop2, RK3568_DSP_IF_POL, dip);
+
+ return crtc->state->adjusted_mode.crtc_clock * 1000LL;
+}
+
+/*
+ * calc the dclk on rk3588
+ * the available div of dclk is 1, 2, 4
+ */
+static unsigned long rk3588_calc_dclk(unsigned long child_clk, unsigned long max_dclk)
+{
+ if (child_clk * 4 <= max_dclk)
+ return child_clk * 4;
+ else if (child_clk * 2 <= max_dclk)
+ return child_clk * 2;
+ else if (child_clk <= max_dclk)
+ return child_clk;
+ else
+ return 0;
+}
+
+/*
+ * 4 pixclk/cycle on rk3588
+ * RGB/eDP/HDMI: if_pixclk >= dclk_core
+ * DP: dp_pixclk = dclk_out <= dclk_core
+ * DSI: mipi_pixclk <= dclk_out <= dclk_core
+ */
+static unsigned long rk3588_calc_cru_cfg(struct vop2_video_port *vp, int id,
+ int *dclk_core_div, int *dclk_out_div,
+ int *if_pixclk_div, int *if_dclk_div)
+{
+ struct vop2 *vop2 = vp->vop2;
+ struct drm_crtc *crtc = &vp->crtc;
+ struct drm_display_mode *adjusted_mode = &crtc->state->adjusted_mode;
+ struct rockchip_crtc_state *vcstate = to_rockchip_crtc_state(crtc->state);
+ int output_mode = vcstate->output_mode;
+ unsigned long v_pixclk = adjusted_mode->crtc_clock * 1000LL; /* video timing pixclk */
+ unsigned long dclk_core_rate = v_pixclk >> 2;
+ unsigned long dclk_rate = v_pixclk;
+ unsigned long dclk_out_rate;
+ unsigned long if_dclk_rate;
+ unsigned long if_pixclk_rate;
+ int K = 1;
+
+ if (vop2_output_if_is_hdmi(id)) {
+ /*
+ * K = 2: dclk_core = if_pixclk_rate > if_dclk_rate
+ * K = 1: dclk_core = hdmie_edp_dclk > if_pixclk_rate
+ */
+ if (output_mode == ROCKCHIP_OUT_MODE_YUV420) {
+ dclk_rate = dclk_rate >> 1;
+ K = 2;
+ }
+
+ if_pixclk_rate = (dclk_core_rate << 1) / K;
+ if_dclk_rate = dclk_core_rate / K;
+ /*
+ * *if_pixclk_div = dclk_rate / if_pixclk_rate;
+ * *if_dclk_div = dclk_rate / if_dclk_rate;
+ */
+ *if_pixclk_div = 2;
+ *if_dclk_div = 4;
+ } else if (vop2_output_if_is_edp(id)) {
+ /*
+ * edp_pixclk = edp_dclk > dclk_core
+ */
+ if_pixclk_rate = v_pixclk / K;
+ dclk_rate = if_pixclk_rate * K;
+ /*
+ * *if_pixclk_div = dclk_rate / if_pixclk_rate;
+ * *if_dclk_div = *if_pixclk_div;
+ */
+ *if_pixclk_div = K;
+ *if_dclk_div = K;
+ } else if (vop2_output_if_is_dp(id)) {
+ if (output_mode == ROCKCHIP_OUT_MODE_YUV420)
+ dclk_out_rate = v_pixclk >> 3;
+ else
+ dclk_out_rate = v_pixclk >> 2;
+
+ dclk_rate = rk3588_calc_dclk(dclk_out_rate, 600000);
+ if (!dclk_rate) {
+ drm_err(vop2->drm, "DP dclk_out_rate out of range, dclk_out_rate: %ld KHZ\n",
+ dclk_out_rate);
+ return 0;
+ }
+ *dclk_out_div = dclk_rate / dclk_out_rate;
+ } else if (vop2_output_if_is_mipi(id)) {
+ if_pixclk_rate = dclk_core_rate / K;
+ /*
+ * dclk_core = dclk_out * K = if_pixclk * K = v_pixclk / 4
+ */
+ dclk_out_rate = if_pixclk_rate;
+ /*
+ * dclk_rate = N * dclk_core_rate N = (1,2,4 ),
+ * we get a little factor here
+ */
+ dclk_rate = rk3588_calc_dclk(dclk_out_rate, 600000);
+ if (!dclk_rate) {
+ drm_err(vop2->drm, "MIPI dclk out of range, dclk_out_rate: %ld KHZ\n",
+ dclk_out_rate);
+ return 0;
+ }
+ *dclk_out_div = dclk_rate / dclk_out_rate;
+ /*
+ * mipi pixclk == dclk_out
+ */
+ *if_pixclk_div = 1;
+ } else if (vop2_output_if_is_dpi(id)) {
+ dclk_rate = v_pixclk;
+ }
+
+ *dclk_core_div = dclk_rate / dclk_core_rate;
+ *if_pixclk_div = ilog2(*if_pixclk_div);
+ *if_dclk_div = ilog2(*if_dclk_div);
+ *dclk_core_div = ilog2(*dclk_core_div);
+ *dclk_out_div = ilog2(*dclk_out_div);
+
+ drm_dbg(vop2->drm, "dclk: %ld, pixclk_div: %d, dclk_div: %d\n",
+ dclk_rate, *if_pixclk_div, *if_dclk_div);
+
+ return dclk_rate;
+}
+
+/*
+ * MIPI port mux on rk3588:
+ * 0: Video Port2
+ * 1: Video Port3
+ * 3: Video Port 1(MIPI1 only)
+ */
+static u32 rk3588_get_mipi_port_mux(int vp_id)
+{
+ if (vp_id == 1)
+ return 3;
+ else if (vp_id == 3)
+ return 1;
+ else
+ return 0;
+}
+
+static u32 rk3588_get_hdmi_pol(u32 flags)
+{
+ u32 val;
+
+ val = (flags & DRM_MODE_FLAG_NHSYNC) ? BIT(HSYNC_POSITIVE) : 0;
+ val |= (flags & DRM_MODE_FLAG_NVSYNC) ? BIT(VSYNC_POSITIVE) : 0;
+
+ return val;
+}
+
+static unsigned long rk3588_set_intf_mux(struct vop2_video_port *vp, int id, u32 polflags)
+{
+ struct vop2 *vop2 = vp->vop2;
+ int dclk_core_div, dclk_out_div, if_pixclk_div, if_dclk_div;
+ unsigned long clock;
+ u32 die, dip, div, vp_clk_div, val;
+
+ clock = rk3588_calc_cru_cfg(vp, id, &dclk_core_div, &dclk_out_div,
+ &if_pixclk_div, &if_dclk_div);
+ if (!clock)
+ return 0;
+
+ vp_clk_div = FIELD_PREP(RK3588_VP_CLK_CTRL__DCLK_CORE_DIV, dclk_core_div);
+ vp_clk_div |= FIELD_PREP(RK3588_VP_CLK_CTRL__DCLK_OUT_DIV, dclk_out_div);
+
+ die = vop2_readl(vop2, RK3568_DSP_IF_EN);
+ dip = vop2_readl(vop2, RK3568_DSP_IF_POL);
+ div = vop2_readl(vop2, RK3568_DSP_IF_CTRL);
+
+ switch (id) {
+ case ROCKCHIP_VOP2_EP_HDMI0:
+ div &= ~RK3588_DSP_IF_EDP_HDMI0_DCLK_DIV;
+ div &= ~RK3588_DSP_IF_EDP_HDMI0_PCLK_DIV;
+ div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI0_DCLK_DIV, if_dclk_div);
+ div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI0_PCLK_DIV, if_pixclk_div);
+ die &= ~RK3588_SYS_DSP_INFACE_EN_EDP_HDMI0_MUX;
+ die |= RK3588_SYS_DSP_INFACE_EN_HDMI0 |
+ FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_EDP_HDMI0_MUX, vp->id);
+ val = rk3588_get_hdmi_pol(polflags);
+ regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, HIWORD_UPDATE(1, 1, 1));
+ regmap_write(vop2->vo1_grf, RK3588_GRF_VO1_CON0, HIWORD_UPDATE(val, 6, 5));
+ break;
+ case ROCKCHIP_VOP2_EP_HDMI1:
+ div &= ~RK3588_DSP_IF_EDP_HDMI1_DCLK_DIV;
+ div &= ~RK3588_DSP_IF_EDP_HDMI1_PCLK_DIV;
+ div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI1_DCLK_DIV, if_dclk_div);
+ div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI1_PCLK_DIV, if_pixclk_div);
+ die &= ~RK3588_SYS_DSP_INFACE_EN_EDP_HDMI1_MUX;
+ die |= RK3588_SYS_DSP_INFACE_EN_HDMI1 |
+ FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_EDP_HDMI1_MUX, vp->id);
+ val = rk3588_get_hdmi_pol(polflags);
+ regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, HIWORD_UPDATE(1, 4, 4));
+ regmap_write(vop2->vo1_grf, RK3588_GRF_VO1_CON0, HIWORD_UPDATE(val, 8, 7));
+ break;
+ case ROCKCHIP_VOP2_EP_EDP0:
+ div &= ~RK3588_DSP_IF_EDP_HDMI0_DCLK_DIV;
+ div &= ~RK3588_DSP_IF_EDP_HDMI0_PCLK_DIV;
+ div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI0_DCLK_DIV, if_dclk_div);
+ div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI0_PCLK_DIV, if_pixclk_div);
+ die &= ~RK3588_SYS_DSP_INFACE_EN_EDP_HDMI0_MUX;
+ die |= RK3588_SYS_DSP_INFACE_EN_EDP0 |
+ FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_EDP_HDMI0_MUX, vp->id);
+ regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, HIWORD_UPDATE(1, 0, 0));
+ break;
+ case ROCKCHIP_VOP2_EP_EDP1:
+ div &= ~RK3588_DSP_IF_EDP_HDMI1_DCLK_DIV;
+ div &= ~RK3588_DSP_IF_EDP_HDMI1_PCLK_DIV;
+ div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI0_DCLK_DIV, if_dclk_div);
+ div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI0_PCLK_DIV, if_pixclk_div);
+ die &= ~RK3588_SYS_DSP_INFACE_EN_EDP_HDMI1_MUX;
+ die |= RK3588_SYS_DSP_INFACE_EN_EDP1 |
+ FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_EDP_HDMI1_MUX, vp->id);
+ regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, HIWORD_UPDATE(1, 3, 3));
+ break;
+ case ROCKCHIP_VOP2_EP_MIPI0:
+ div &= ~RK3588_DSP_IF_MIPI0_PCLK_DIV;
+ div |= FIELD_PREP(RK3588_DSP_IF_MIPI0_PCLK_DIV, if_pixclk_div);
+ die &= ~RK3588_SYS_DSP_INFACE_EN_MIPI0_MUX;
+ val = rk3588_get_mipi_port_mux(vp->id);
+ die |= RK3588_SYS_DSP_INFACE_EN_MIPI0 |
+ FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_MIPI0_MUX, !!val);
+ break;
+ case ROCKCHIP_VOP2_EP_MIPI1:
+ div &= ~RK3588_DSP_IF_MIPI1_PCLK_DIV;
+ div |= FIELD_PREP(RK3588_DSP_IF_MIPI1_PCLK_DIV, if_pixclk_div);
+ die &= ~RK3588_SYS_DSP_INFACE_EN_MIPI1_MUX;
+ val = rk3588_get_mipi_port_mux(vp->id);
+ die |= RK3588_SYS_DSP_INFACE_EN_MIPI1 |
+ FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_MIPI1_MUX, val);
+ break;
+ case ROCKCHIP_VOP2_EP_DP0:
+ die &= ~RK3588_SYS_DSP_INFACE_EN_DP0_MUX;
+ die |= RK3588_SYS_DSP_INFACE_EN_DP0 |
+ FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_DP0_MUX, vp->id);
+ dip &= ~RK3588_DSP_IF_POL__DP0_PIN_POL;
+ dip |= FIELD_PREP(RK3588_DSP_IF_POL__DP0_PIN_POL, polflags);
+ break;
+ case ROCKCHIP_VOP2_EP_DP1:
+ die &= ~RK3588_SYS_DSP_INFACE_EN_MIPI1_MUX;
+ die |= RK3588_SYS_DSP_INFACE_EN_MIPI1 |
+ FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_MIPI1_MUX, vp->id);
+ dip &= ~RK3588_DSP_IF_POL__DP1_PIN_POL;
+ dip |= FIELD_PREP(RK3588_DSP_IF_POL__DP1_PIN_POL, polflags);
+ break;
+ default:
+ drm_err(vop2->drm, "Invalid interface id %d on vp%d\n", id, vp->id);
+ return 0;
+ }
+
+ dip |= RK3568_DSP_IF_POL__CFG_DONE_IMD;
+
+ vop2_vp_write(vp, RK3588_VP_CLK_CTRL, vp_clk_div);
+ vop2_writel(vop2, RK3568_DSP_IF_EN, die);
+ vop2_writel(vop2, RK3568_DSP_IF_CTRL, div);
+ vop2_writel(vop2, RK3568_DSP_IF_POL, dip);
+
+ return clock;
+}
+
+static unsigned long vop2_set_intf_mux(struct vop2_video_port *vp, int ep_id, u32 polflags)
+{
+ struct vop2 *vop2 = vp->vop2;
+
+ if (vop2->data->soc_id == 3566 || vop2->data->soc_id == 3568)
+ return rk3568_set_intf_mux(vp, ep_id, polflags);
+ else if (vop2->data->soc_id == 3588)
+ return rk3588_set_intf_mux(vp, ep_id, polflags);
+ else
+ return 0;
}
static int us_to_vertical_line(struct drm_display_mode *mode, int us)
@@ -1659,9 +1994,17 @@ static void vop2_crtc_atomic_enable(stru
drm_for_each_encoder_mask(encoder, crtc->dev, crtc_state->encoder_mask) {
struct rockchip_encoder *rkencoder = to_rockchip_encoder(encoder);
- rk3568_set_intf_mux(vp, rkencoder->crtc_endpoint_id, polflags);
+ /*
+ * for drive a high resolution(4KP120, 8K), vop on rk3588/rk3576 need
+ * process multi(1/2/4/8) pixels per cycle, so the dclk feed by the
+ * system cru may be the 1/2 or 1/4 of mode->clock.
+ */
+ clock = vop2_set_intf_mux(vp, rkencoder->crtc_endpoint_id, polflags);
}
+ if (!clock)
+ return;
+
if (vcstate->output_mode == ROCKCHIP_OUT_MODE_AAAA &&
!(vp_data->feature & VOP_FEATURE_OUTPUT_10BIT))
out_mode = ROCKCHIP_OUT_MODE_P888;
@@ -1672,6 +2015,8 @@ static void vop2_crtc_atomic_enable(stru
if (vop2_output_uv_swap(vcstate->bus_format, vcstate->output_mode))
dsp_ctrl |= RK3568_VP_DSP_CTRL__DSP_RB_SWAP;
+ if (vop2_output_rg_swap(vop2, vcstate->bus_format))
+ dsp_ctrl |= RK3568_VP_DSP_CTRL__DSP_RG_SWAP;
if (vcstate->yuv_overlay)
dsp_ctrl |= RK3568_VP_DSP_CTRL__POST_DSP_OUT_R2Y;
@@ -2079,6 +2424,14 @@ static void vop2_setup_layer_mixer(struc
port_sel &= ~RK3568_OVL_PORT_SEL__CLUSTER1;
port_sel |= FIELD_PREP(RK3568_OVL_PORT_SEL__CLUSTER1, vp->id);
break;
+ case ROCKCHIP_VOP2_CLUSTER2:
+ port_sel &= ~RK3588_OVL_PORT_SEL__CLUSTER2;
+ port_sel |= FIELD_PREP(RK3588_OVL_PORT_SEL__CLUSTER2, vp->id);
+ break;
+ case ROCKCHIP_VOP2_CLUSTER3:
+ port_sel &= ~RK3588_OVL_PORT_SEL__CLUSTER3;
+ port_sel |= FIELD_PREP(RK3588_OVL_PORT_SEL__CLUSTER3, vp->id);
+ break;
case ROCKCHIP_VOP2_ESMART0:
port_sel &= ~RK3568_OVL_PORT_SEL__ESMART0;
port_sel |= FIELD_PREP(RK3568_OVL_PORT_SEL__ESMART0, vp->id);
@@ -2087,6 +2440,14 @@ static void vop2_setup_layer_mixer(struc
port_sel &= ~RK3568_OVL_PORT_SEL__ESMART1;
port_sel |= FIELD_PREP(RK3568_OVL_PORT_SEL__ESMART1, vp->id);
break;
+ case ROCKCHIP_VOP2_ESMART2:
+ port_sel &= ~RK3588_OVL_PORT_SEL__ESMART2;
+ port_sel |= FIELD_PREP(RK3588_OVL_PORT_SEL__ESMART2, vp->id);
+ break;
+ case ROCKCHIP_VOP2_ESMART3:
+ port_sel &= ~RK3588_OVL_PORT_SEL__ESMART3;
+ port_sel |= FIELD_PREP(RK3588_OVL_PORT_SEL__ESMART3, vp->id);
+ break;
case ROCKCHIP_VOP2_SMART0:
port_sel &= ~RK3568_OVL_PORT_SEL__SMART0;
port_sel |= FIELD_PREP(RK3568_OVL_PORT_SEL__SMART0, vp->id);
@@ -2820,8 +3181,29 @@ static int vop2_bind(struct device *dev,
if (IS_ERR(vop2->lut_regs))
return PTR_ERR(vop2->lut_regs);
}
+ if (vop2_data->feature & VOP2_FEATURE_HAS_SYS_GRF) {
+ vop2->sys_grf = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,grf");
+ if (IS_ERR(vop2->sys_grf))
+ return dev_err_probe(dev, PTR_ERR(vop2->sys_grf), "cannot get sys_grf");
+ }
+
+ if (vop2_data->feature & VOP2_FEATURE_HAS_VOP_GRF) {
+ vop2->vop_grf = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,vop-grf");
+ if (IS_ERR(vop2->vop_grf))
+ return dev_err_probe(dev, PTR_ERR(vop2->vop_grf), "cannot get vop_grf");
+ }
+
+ if (vop2_data->feature & VOP2_FEATURE_HAS_VO1_GRF) {
+ vop2->vo1_grf = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,vo1-grf");
+ if (IS_ERR(vop2->vo1_grf))
+ return dev_err_probe(dev, PTR_ERR(vop2->vo1_grf), "cannot get vo1_grf");
+ }
- vop2->sys_grf = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,grf");
+ if (vop2_data->feature & VOP2_FEATURE_HAS_SYS_PMU) {
+ vop2->sys_pmu = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,pmu");
+ if (IS_ERR(vop2->sys_pmu))
+ return dev_err_probe(dev, PTR_ERR(vop2->sys_pmu), "cannot get sys_pmu");
+ }
vop2->hclk = devm_clk_get(vop2->dev, "hclk");
if (IS_ERR(vop2->hclk)) {
@@ -2835,6 +3217,12 @@ static int vop2_bind(struct device *dev,
return PTR_ERR(vop2->aclk);
}
+ vop2->pclk = devm_clk_get_optional(vop2->dev, "pclk_vop");
+ if (IS_ERR(vop2->pclk)) {
+ drm_err(vop2->drm, "failed to get pclk source\n");
+ return PTR_ERR(vop2->pclk);
+ }
+
vop2->irq = platform_get_irq(pdev, 0);
if (vop2->irq < 0) {
drm_err(vop2->drm, "cannot find irq for vop2\n");
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h
@@ -14,9 +14,16 @@
#define VOP_FEATURE_OUTPUT_10BIT BIT(0)
+#define VOP2_FEATURE_HAS_SYS_GRF BIT(0)
+#define VOP2_FEATURE_HAS_VO0_GRF BIT(1)
+#define VOP2_FEATURE_HAS_VO1_GRF BIT(2)
+#define VOP2_FEATURE_HAS_VOP_GRF BIT(3)
+#define VOP2_FEATURE_HAS_SYS_PMU BIT(4)
+
#define WIN_FEATURE_AFBDC BIT(0)
#define WIN_FEATURE_CLUSTER BIT(1)
+#define HIWORD_UPDATE(v, h, l) ((GENMASK(h, l) << 16) | ((v) << (l)))
/*
* the delay number of a window in different mode.
*/
@@ -39,6 +46,18 @@ enum vop2_scale_down_mode {
VOP2_SCALE_DOWN_AVG,
};
+/*
+ * vop2 internal power domain id,
+ * should be all none zero, 0 will be treat as invalid;
+ */
+#define VOP2_PD_CLUSTER0 BIT(0)
+#define VOP2_PD_CLUSTER1 BIT(1)
+#define VOP2_PD_CLUSTER2 BIT(2)
+#define VOP2_PD_CLUSTER3 BIT(3)
+#define VOP2_PD_DSC_8K BIT(5)
+#define VOP2_PD_DSC_4K BIT(6)
+#define VOP2_PD_ESMART BIT(7)
+
enum vop2_win_regs {
VOP2_WIN_ENABLE,
VOP2_WIN_FORMAT,
@@ -139,6 +158,7 @@ struct vop2_video_port_data {
struct vop2_data {
u8 nr_vps;
+ u64 feature;
const struct vop2_win_data *win;
const struct vop2_video_port_data *vp;
struct vop_rect max_input;
@@ -193,6 +213,11 @@ enum dst_factor_mode {
};
#define RK3568_GRF_VO_CON1 0x0364
+
+#define RK3588_GRF_SOC_CON1 0x0304
+#define RK3588_GRF_VOP_CON2 0x08
+#define RK3588_GRF_VO1_CON0 0x00
+
/* System registers definition */
#define RK3568_REG_CFG_DONE 0x000
#define RK3568_VERSION_INFO 0x004
@@ -201,6 +226,7 @@ enum dst_factor_mode {
#define RK3568_DSP_IF_EN 0x028
#define RK3568_DSP_IF_CTRL 0x02c
#define RK3568_DSP_IF_POL 0x030
+#define RK3588_SYS_PD_CTRL 0x034
#define RK3568_WB_CTRL 0x40
#define RK3568_WB_XSCAL_FACTOR 0x44
#define RK3568_WB_YRGB_MST 0x48
@@ -221,9 +247,14 @@ enum dst_factor_mode {
#define RK3568_VP_INT_RAW_STATUS(vp) (0xAC + (vp) * 0x10)
/* Video Port registers definition */
+#define RK3568_VP0_CTRL_BASE 0x0C00
+#define RK3568_VP1_CTRL_BASE 0x0D00
+#define RK3568_VP2_CTRL_BASE 0x0E00
+#define RK3588_VP3_CTRL_BASE 0x0F00
#define RK3568_VP_DSP_CTRL 0x00
#define RK3568_VP_MIPI_CTRL 0x04
#define RK3568_VP_COLOR_BAR_CTRL 0x08
+#define RK3588_VP_CLK_CTRL 0x0C
#define RK3568_VP_3D_LUT_CTRL 0x10
#define RK3568_VP_3D_LUT_MST 0x20
#define RK3568_VP_DSP_BG 0x2C
@@ -265,6 +296,17 @@ enum dst_factor_mode {
#define RK3568_SMART_DLY_NUM 0x6F8
/* Cluster register definition, offset relative to window base */
+#define RK3568_CLUSTER0_CTRL_BASE 0x1000
+#define RK3568_CLUSTER1_CTRL_BASE 0x1200
+#define RK3588_CLUSTER2_CTRL_BASE 0x1400
+#define RK3588_CLUSTER3_CTRL_BASE 0x1600
+#define RK3568_ESMART0_CTRL_BASE 0x1800
+#define RK3568_ESMART1_CTRL_BASE 0x1A00
+#define RK3568_SMART0_CTRL_BASE 0x1C00
+#define RK3568_SMART1_CTRL_BASE 0x1E00
+#define RK3588_ESMART2_CTRL_BASE 0x1C00
+#define RK3588_ESMART3_CTRL_BASE 0x1E00
+
#define RK3568_CLUSTER_WIN_CTRL0 0x00
#define RK3568_CLUSTER_WIN_CTRL1 0x04
#define RK3568_CLUSTER_WIN_YRGB_MST 0x10
@@ -358,13 +400,18 @@ enum dst_factor_mode {
#define RK3568_VP_DSP_CTRL__DITHER_DOWN_EN BIT(17)
#define RK3568_VP_DSP_CTRL__PRE_DITHER_DOWN_EN BIT(16)
#define RK3568_VP_DSP_CTRL__POST_DSP_OUT_R2Y BIT(15)
+#define RK3568_VP_DSP_CTRL__DSP_RG_SWAP BIT(10)
#define RK3568_VP_DSP_CTRL__DSP_RB_SWAP BIT(9)
+#define RK3568_VP_DSP_CTRL__DSP_BG_SWAP BIT(8)
#define RK3568_VP_DSP_CTRL__DSP_INTERLACE BIT(7)
#define RK3568_VP_DSP_CTRL__DSP_FILED_POL BIT(6)
#define RK3568_VP_DSP_CTRL__P2I_EN BIT(5)
#define RK3568_VP_DSP_CTRL__CORE_DCLK_DIV BIT(4)
#define RK3568_VP_DSP_CTRL__OUT_MODE GENMASK(3, 0)
+#define RK3588_VP_CLK_CTRL__DCLK_OUT_DIV GENMASK(3, 2)
+#define RK3588_VP_CLK_CTRL__DCLK_CORE_DIV GENMASK(1, 0)
+
#define RK3568_VP_POST_SCL_CTRL__VSCALEDOWN BIT(1)
#define RK3568_VP_POST_SCL_CTRL__HSCALEDOWN BIT(0)
@@ -383,11 +430,37 @@ enum dst_factor_mode {
#define RK3568_SYS_DSP_INFACE_EN_HDMI BIT(1)
#define RK3568_SYS_DSP_INFACE_EN_RGB BIT(0)
+#define RK3588_SYS_DSP_INFACE_EN_MIPI1_MUX GENMASK(22, 21)
+#define RK3588_SYS_DSP_INFACE_EN_MIPI0_MUX GENMASK(20, 20)
+#define RK3588_SYS_DSP_INFACE_EN_EDP_HDMI1_MUX GENMASK(19, 18)
+#define RK3588_SYS_DSP_INFACE_EN_EDP_HDMI0_MUX GENMASK(17, 16)
+#define RK3588_SYS_DSP_INFACE_EN_DP1_MUX GENMASK(15, 14)
+#define RK3588_SYS_DSP_INFACE_EN_DP0_MUX GENMASK(13, 12)
+#define RK3588_SYS_DSP_INFACE_EN_DPI GENMASK(9, 8)
+#define RK3588_SYS_DSP_INFACE_EN_MIPI1 BIT(7)
+#define RK3588_SYS_DSP_INFACE_EN_MIPI0 BIT(6)
+#define RK3588_SYS_DSP_INFACE_EN_HDMI1 BIT(5)
+#define RK3588_SYS_DSP_INFACE_EN_EDP1 BIT(4)
+#define RK3588_SYS_DSP_INFACE_EN_HDMI0 BIT(3)
+#define RK3588_SYS_DSP_INFACE_EN_EDP0 BIT(2)
+#define RK3588_SYS_DSP_INFACE_EN_DP1 BIT(1)
+#define RK3588_SYS_DSP_INFACE_EN_DP0 BIT(0)
+
+#define RK3588_DSP_IF_MIPI1_PCLK_DIV GENMASK(27, 26)
+#define RK3588_DSP_IF_MIPI0_PCLK_DIV GENMASK(25, 24)
+#define RK3588_DSP_IF_EDP_HDMI1_PCLK_DIV GENMASK(22, 22)
+#define RK3588_DSP_IF_EDP_HDMI1_DCLK_DIV GENMASK(21, 20)
+#define RK3588_DSP_IF_EDP_HDMI0_PCLK_DIV GENMASK(18, 18)
+#define RK3588_DSP_IF_EDP_HDMI0_DCLK_DIV GENMASK(17, 16)
+
#define RK3568_DSP_IF_POL__MIPI_PIN_POL GENMASK(19, 16)
#define RK3568_DSP_IF_POL__EDP_PIN_POL GENMASK(15, 12)
#define RK3568_DSP_IF_POL__HDMI_PIN_POL GENMASK(7, 4)
#define RK3568_DSP_IF_POL__RGB_LVDS_PIN_POL GENMASK(3, 0)
+#define RK3588_DSP_IF_POL__DP1_PIN_POL GENMASK(14, 12)
+#define RK3588_DSP_IF_POL__DP0_PIN_POL GENMASK(10, 8)
+
#define RK3568_VP0_MIPI_CTRL__DCLK_DIV2_PHASE_LOCK BIT(5)
#define RK3568_VP0_MIPI_CTRL__DCLK_DIV2 BIT(4)
@@ -409,8 +482,12 @@ enum dst_factor_mode {
#define RK3568_OVL_PORT_SEL__SEL_PORT GENMASK(31, 16)
#define RK3568_OVL_PORT_SEL__SMART1 GENMASK(31, 30)
#define RK3568_OVL_PORT_SEL__SMART0 GENMASK(29, 28)
+#define RK3588_OVL_PORT_SEL__ESMART3 GENMASK(31, 30)
+#define RK3588_OVL_PORT_SEL__ESMART2 GENMASK(29, 28)
#define RK3568_OVL_PORT_SEL__ESMART1 GENMASK(27, 26)
#define RK3568_OVL_PORT_SEL__ESMART0 GENMASK(25, 24)
+#define RK3588_OVL_PORT_SEL__CLUSTER3 GENMASK(23, 22)
+#define RK3588_OVL_PORT_SEL__CLUSTER2 GENMASK(21, 20)
#define RK3568_OVL_PORT_SEL__CLUSTER1 GENMASK(19, 18)
#define RK3568_OVL_PORT_SEL__CLUSTER0 GENMASK(17, 16)
#define RK3568_OVL_PORT_SET__PORT2_MUX GENMASK(11, 8)
@@ -423,6 +500,10 @@ enum dst_factor_mode {
#define RK3568_CLUSTER_DLY_NUM__CLUSTER0_1 GENMASK(15, 8)
#define RK3568_CLUSTER_DLY_NUM__CLUSTER0_0 GENMASK(7, 0)
+#define RK3568_CLUSTER_WIN_CTRL0__WIN0_EN BIT(0)
+
+#define RK3568_SMART_REGION0_CTRL__WIN0_EN BIT(0)
+
#define RK3568_SMART_DLY_NUM__SMART1 GENMASK(31, 24)
#define RK3568_SMART_DLY_NUM__SMART0 GENMASK(23, 16)
#define RK3568_SMART_DLY_NUM__ESMART1 GENMASK(15, 8)
--- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
+++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
@@ -34,6 +34,30 @@ static const uint32_t formats_cluster[]
DRM_FORMAT_Y210, /* yuv422_10bit non-Linear mode only */
};
+static const uint32_t formats_esmart[] = {
+ DRM_FORMAT_XRGB8888,
+ DRM_FORMAT_ARGB8888,
+ DRM_FORMAT_XBGR8888,
+ DRM_FORMAT_ABGR8888,
+ DRM_FORMAT_RGB888,
+ DRM_FORMAT_BGR888,
+ DRM_FORMAT_RGB565,
+ DRM_FORMAT_BGR565,
+ DRM_FORMAT_NV12, /* yuv420_8bit linear mode, 2 plane */
+ DRM_FORMAT_NV21, /* yvu420_8bit linear mode, 2 plane */
+ DRM_FORMAT_NV16, /* yuv422_8bit linear mode, 2 plane */
+ DRM_FORMAT_NV61, /* yvu422_8bit linear mode, 2 plane */
+ DRM_FORMAT_NV20, /* yuv422_10bit linear mode, 2 plane, no padding */
+ DRM_FORMAT_NV24, /* yuv444_8bit linear mode, 2 plane */
+ DRM_FORMAT_NV42, /* yvu444_8bit linear mode, 2 plane */
+ DRM_FORMAT_NV30, /* yuv444_10bit linear mode, 2 plane, no padding */
+ DRM_FORMAT_NV15, /* yuv420_10bit linear mode, 2 plane, no padding */
+ DRM_FORMAT_YVYU, /* yuv422_8bit[YVYU] linear mode */
+ DRM_FORMAT_VYUY, /* yuv422_8bit[VYUY] linear mode */
+ DRM_FORMAT_YUYV, /* yuv422_8bit[YUYV] linear mode */
+ DRM_FORMAT_UYVY, /* yuv422_8bit[UYVY] linear mode */
+};
+
static const uint32_t formats_rk356x_esmart[] = {
DRM_FORMAT_XRGB8888,
DRM_FORMAT_ARGB8888,
@@ -236,7 +260,188 @@ static const struct vop2_win_data rk3568
},
};
+static const struct vop2_video_port_data rk3588_vop_video_ports[] = {
+ {
+ .id = 0,
+ .feature = VOP_FEATURE_OUTPUT_10BIT,
+ .gamma_lut_len = 1024,
+ .cubic_lut_len = 9 * 9 * 9, /* 9x9x9 */
+ .max_output = { 4096, 2304 },
+ /* hdr2sdr sdr2hdr hdr2hdr sdr2sdr */
+ .pre_scan_max_dly = { 76, 65, 65, 54 },
+ .offset = 0xc00,
+ }, {
+ .id = 1,
+ .feature = VOP_FEATURE_OUTPUT_10BIT,
+ .gamma_lut_len = 1024,
+ .cubic_lut_len = 729, /* 9x9x9 */
+ .max_output = { 4096, 2304 },
+ .pre_scan_max_dly = { 76, 65, 65, 54 },
+ .offset = 0xd00,
+ }, {
+ .id = 2,
+ .feature = VOP_FEATURE_OUTPUT_10BIT,
+ .gamma_lut_len = 1024,
+ .cubic_lut_len = 17 * 17 * 17, /* 17x17x17 */
+ .max_output = { 4096, 2304 },
+ .pre_scan_max_dly = { 52, 52, 52, 52 },
+ .offset = 0xe00,
+ }, {
+ .id = 3,
+ .gamma_lut_len = 1024,
+ .max_output = { 2048, 1536 },
+ .pre_scan_max_dly = { 52, 52, 52, 52 },
+ .offset = 0xf00,
+ },
+};
+
+/*
+ * rk3588 vop with 4 cluster, 4 esmart win.
+ * Every cluster can work as 4K win or split into two win.
+ * All win in cluster support AFBCD.
+ *
+ * Every esmart win and smart win support 4 Multi-region.
+ *
+ * Scale filter mode:
+ *
+ * * Cluster: bicubic for horizontal scale up, others use bilinear
+ * * ESmart:
+ * * nearest-neighbor/bilinear/bicubic for scale up
+ * * nearest-neighbor/bilinear/average for scale down
+ *
+ * AXI Read ID assignment:
+ * Two AXI bus:
+ * AXI0 is a read/write bus with a higher performance.
+ * AXI1 is a read only bus.
+ *
+ * Every window on a AXI bus must assigned two unique
+ * read id(yrgb_id/uv_id, valid id are 0x1~0xe).
+ *
+ * AXI0:
+ * Cluster0/1, Esmart0/1, WriteBack
+ *
+ * AXI 1:
+ * Cluster2/3, Esmart2/3
+ *
+ */
+static const struct vop2_win_data rk3588_vop_win_data[] = {
+ {
+ .name = "Cluster0-win0",
+ .phys_id = ROCKCHIP_VOP2_CLUSTER0,
+ .base = 0x1000,
+ .formats = formats_cluster,
+ .nformats = ARRAY_SIZE(formats_cluster),
+ .format_modifiers = format_modifiers_afbc,
+ .layer_sel_id = 0,
+ .supported_rotations = DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270 |
+ DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y,
+ .max_upscale_factor = 4,
+ .max_downscale_factor = 4,
+ .dly = { 4, 26, 29 },
+ .type = DRM_PLANE_TYPE_PRIMARY,
+ .feature = WIN_FEATURE_AFBDC | WIN_FEATURE_CLUSTER,
+ }, {
+ .name = "Cluster1-win0",
+ .phys_id = ROCKCHIP_VOP2_CLUSTER1,
+ .base = 0x1200,
+ .formats = formats_cluster,
+ .nformats = ARRAY_SIZE(formats_cluster),
+ .format_modifiers = format_modifiers_afbc,
+ .layer_sel_id = 1,
+ .supported_rotations = DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270 |
+ DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y,
+ .type = DRM_PLANE_TYPE_PRIMARY,
+ .max_upscale_factor = 4,
+ .max_downscale_factor = 4,
+ .dly = { 4, 26, 29 },
+ .feature = WIN_FEATURE_AFBDC | WIN_FEATURE_CLUSTER,
+ }, {
+ .name = "Cluster2-win0",
+ .phys_id = ROCKCHIP_VOP2_CLUSTER2,
+ .base = 0x1400,
+ .formats = formats_cluster,
+ .nformats = ARRAY_SIZE(formats_cluster),
+ .format_modifiers = format_modifiers_afbc,
+ .layer_sel_id = 4,
+ .supported_rotations = DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270 |
+ DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y,
+ .type = DRM_PLANE_TYPE_PRIMARY,
+ .max_upscale_factor = 4,
+ .max_downscale_factor = 4,
+ .dly = { 4, 26, 29 },
+ .feature = WIN_FEATURE_AFBDC | WIN_FEATURE_CLUSTER,
+ }, {
+ .name = "Cluster3-win0",
+ .phys_id = ROCKCHIP_VOP2_CLUSTER3,
+ .base = 0x1600,
+ .formats = formats_cluster,
+ .nformats = ARRAY_SIZE(formats_cluster),
+ .format_modifiers = format_modifiers_afbc,
+ .layer_sel_id = 5,
+ .supported_rotations = DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270 |
+ DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y,
+ .type = DRM_PLANE_TYPE_PRIMARY,
+ .max_upscale_factor = 4,
+ .max_downscale_factor = 4,
+ .dly = { 4, 26, 29 },
+ .feature = WIN_FEATURE_AFBDC | WIN_FEATURE_CLUSTER,
+ }, {
+ .name = "Esmart0-win0",
+ .phys_id = ROCKCHIP_VOP2_ESMART0,
+ .formats = formats_esmart,
+ .nformats = ARRAY_SIZE(formats_esmart),
+ .format_modifiers = format_modifiers,
+ .base = 0x1800,
+ .layer_sel_id = 2,
+ .supported_rotations = DRM_MODE_REFLECT_Y,
+ .type = DRM_PLANE_TYPE_OVERLAY,
+ .max_upscale_factor = 8,
+ .max_downscale_factor = 8,
+ .dly = { 23, 45, 48 },
+ }, {
+ .name = "Esmart1-win0",
+ .phys_id = ROCKCHIP_VOP2_ESMART1,
+ .formats = formats_esmart,
+ .nformats = ARRAY_SIZE(formats_esmart),
+ .format_modifiers = format_modifiers,
+ .base = 0x1a00,
+ .layer_sel_id = 3,
+ .supported_rotations = DRM_MODE_REFLECT_Y,
+ .type = DRM_PLANE_TYPE_OVERLAY,
+ .max_upscale_factor = 8,
+ .max_downscale_factor = 8,
+ .dly = { 23, 45, 48 },
+ }, {
+ .name = "Esmart2-win0",
+ .phys_id = ROCKCHIP_VOP2_ESMART2,
+ .base = 0x1c00,
+ .formats = formats_esmart,
+ .nformats = ARRAY_SIZE(formats_esmart),
+ .format_modifiers = format_modifiers,
+ .layer_sel_id = 6,
+ .supported_rotations = DRM_MODE_REFLECT_Y,
+ .type = DRM_PLANE_TYPE_OVERLAY,
+ .max_upscale_factor = 8,
+ .max_downscale_factor = 8,
+ .dly = { 23, 45, 48 },
+ }, {
+ .name = "Esmart3-win0",
+ .phys_id = ROCKCHIP_VOP2_ESMART3,
+ .formats = formats_esmart,
+ .nformats = ARRAY_SIZE(formats_esmart),
+ .format_modifiers = format_modifiers,
+ .base = 0x1e00,
+ .layer_sel_id = 7,
+ .supported_rotations = DRM_MODE_REFLECT_Y,
+ .type = DRM_PLANE_TYPE_OVERLAY,
+ .max_upscale_factor = 8,
+ .max_downscale_factor = 8,
+ .dly = { 23, 45, 48 },
+ },
+};
+
static const struct vop2_data rk3566_vop = {
+ .feature = VOP2_FEATURE_HAS_SYS_GRF,
.nr_vps = 3,
.max_input = { 4096, 2304 },
.max_output = { 4096, 2304 },
@@ -247,6 +452,7 @@ static const struct vop2_data rk3566_vop
};
static const struct vop2_data rk3568_vop = {
+ .feature = VOP2_FEATURE_HAS_SYS_GRF,
.nr_vps = 3,
.max_input = { 4096, 2304 },
.max_output = { 4096, 2304 },
@@ -256,6 +462,18 @@ static const struct vop2_data rk3568_vop
.soc_id = 3568,
};
+static const struct vop2_data rk3588_vop = {
+ .feature = VOP2_FEATURE_HAS_SYS_GRF | VOP2_FEATURE_HAS_VO1_GRF |
+ VOP2_FEATURE_HAS_VOP_GRF | VOP2_FEATURE_HAS_SYS_PMU,
+ .nr_vps = 4,
+ .max_input = { 4096, 4320 },
+ .max_output = { 4096, 4320 },
+ .vp = rk3588_vop_video_ports,
+ .win = rk3588_vop_win_data,
+ .win_size = ARRAY_SIZE(rk3588_vop_win_data),
+ .soc_id = 3588,
+};
+
static const struct of_device_id vop2_dt_match[] = {
{
.compatible = "rockchip,rk3566-vop",
@@ -264,6 +482,9 @@ static const struct of_device_id vop2_dt
.compatible = "rockchip,rk3568-vop",
.data = &rk3568_vop,
}, {
+ .compatible = "rockchip,rk3588-vop",
+ .data = &rk3588_vop
+ }, {
},
};
MODULE_DEVICE_TABLE(of, vop2_dt_match);

View File

@ -1,80 +0,0 @@
From 9d7fe7704d534c2d043aff2987f10671a8b4373d Mon Sep 17 00:00:00 2001
From: Andy Yan <andy.yan@rock-chips.com>
Date: Mon, 11 Dec 2023 19:59:31 +0800
Subject: [PATCH] drm/rockchip: vop2: rename VOP_FEATURE_OUTPUT_10BIT to
VOP2_VP_FEATURE_OUTPUT_10BIT
VOP2 has multiple independent video ports with different
feature, so rename VOP_FEATURE_OUTPUT_10BIT to
VOP2_VP_FEATURE_OUTPUT_10BIT for more clearly meaning.
Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
Reviewed-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231211115931.1785495-1-andyshrk@163.com
---
drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 2 +-
drivers/gpu/drm/rockchip/rockchip_drm_vop2.h | 2 +-
drivers/gpu/drm/rockchip/rockchip_vop2_reg.c | 8 ++++----
3 files changed, 6 insertions(+), 6 deletions(-)
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
@@ -2006,7 +2006,7 @@ static void vop2_crtc_atomic_enable(stru
return;
if (vcstate->output_mode == ROCKCHIP_OUT_MODE_AAAA &&
- !(vp_data->feature & VOP_FEATURE_OUTPUT_10BIT))
+ !(vp_data->feature & VOP2_VP_FEATURE_OUTPUT_10BIT))
out_mode = ROCKCHIP_OUT_MODE_P888;
else
out_mode = vcstate->output_mode;
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h
@@ -12,7 +12,7 @@
#include "rockchip_drm_drv.h"
#include "rockchip_drm_vop.h"
-#define VOP_FEATURE_OUTPUT_10BIT BIT(0)
+#define VOP2_VP_FEATURE_OUTPUT_10BIT BIT(0)
#define VOP2_FEATURE_HAS_SYS_GRF BIT(0)
#define VOP2_FEATURE_HAS_VO0_GRF BIT(1)
--- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
+++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
@@ -136,7 +136,7 @@ static const uint64_t format_modifiers_a
static const struct vop2_video_port_data rk3568_vop_video_ports[] = {
{
.id = 0,
- .feature = VOP_FEATURE_OUTPUT_10BIT,
+ .feature = VOP2_VP_FEATURE_OUTPUT_10BIT,
.gamma_lut_len = 1024,
.cubic_lut_len = 9 * 9 * 9,
.max_output = { 4096, 2304 },
@@ -263,7 +263,7 @@ static const struct vop2_win_data rk3568
static const struct vop2_video_port_data rk3588_vop_video_ports[] = {
{
.id = 0,
- .feature = VOP_FEATURE_OUTPUT_10BIT,
+ .feature = VOP2_VP_FEATURE_OUTPUT_10BIT,
.gamma_lut_len = 1024,
.cubic_lut_len = 9 * 9 * 9, /* 9x9x9 */
.max_output = { 4096, 2304 },
@@ -272,7 +272,7 @@ static const struct vop2_video_port_data
.offset = 0xc00,
}, {
.id = 1,
- .feature = VOP_FEATURE_OUTPUT_10BIT,
+ .feature = VOP2_VP_FEATURE_OUTPUT_10BIT,
.gamma_lut_len = 1024,
.cubic_lut_len = 729, /* 9x9x9 */
.max_output = { 4096, 2304 },
@@ -280,7 +280,7 @@ static const struct vop2_video_port_data
.offset = 0xd00,
}, {
.id = 2,
- .feature = VOP_FEATURE_OUTPUT_10BIT,
+ .feature = VOP2_VP_FEATURE_OUTPUT_10BIT,
.gamma_lut_len = 1024,
.cubic_lut_len = 17 * 17 * 17, /* 17x17x17 */
.max_output = { 4096, 2304 },

View File

@ -1,59 +0,0 @@
From 3ee348eb36f14e9303a7e9757efb91b0bbf3f7a9 Mon Sep 17 00:00:00 2001
From: Andy Yan <andy.yan@rock-chips.com>
Date: Sun, 17 Dec 2023 16:44:15 +0800
Subject: [PATCH] drm/rockchip: vop2: Avoid use regmap_reinit_cache at runtime
Marek Report a possible irq lock inversion dependency warning when
commit 81a06f1d02e5 ("Revert "drm/rockchip: vop2: Use regcache_sync()
to fix suspend/resume"") lands linux-next.
I can reproduce this warning with:
CONFIG_PROVE_LOCKING=y
CONFIG_DEBUG_LOCKDEP=y
It seems than when use regmap_reinit_cache at runtime whith Mark's
commit 3d59c22bbb8d ("drm/rockchip: vop2: Convert to use maple tree
register cache"), it will trigger a possible irq lock inversion dependency
warning.
One solution is switch back to REGCACHE_RBTREE, but it seems that
REGCACHE_MAPLE is the future, so I avoid using regmap_reinit_cache,
and drop all the regcache when vop is disabled, then we get a fresh
start at next enbable time.
Fixes: 81a06f1d02e5 ("Revert "drm/rockchip: vop2: Use regcache_sync() to fix suspend/resume"")
Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
Closes: https://lore.kernel.org/all/98a9f15d-30ac-47bf-9b93-3aa2c9900f7b@samsung.com/
Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
[dropped the large kernel log of the lockdep report from the message]
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231217084415.2373043-1-andyshrk@163.com
---
drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 8 ++------
1 file changed, 2 insertions(+), 6 deletions(-)
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
@@ -970,12 +970,6 @@ static void vop2_enable(struct vop2 *vop
return;
}
- ret = regmap_reinit_cache(vop2->map, &vop2_regmap_config);
- if (ret) {
- drm_err(vop2->drm, "failed to reinit cache: %d\n", ret);
- return;
- }
-
if (vop2->data->soc_id == 3566)
vop2_writel(vop2, RK3568_OTP_WIN_EN, 1);
@@ -1007,6 +1001,8 @@ static void vop2_disable(struct vop2 *vo
pm_runtime_put_sync(vop2->dev);
+ regcache_drop_region(vop2->map, 0, vop2_regmap_config.max_register);
+
clk_disable_unprepare(vop2->pclk);
clk_disable_unprepare(vop2->aclk);
clk_disable_unprepare(vop2->hclk);

View File

@ -1,31 +0,0 @@
From f40e61eb538d35661d6dda1de92867954d776c4a Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Tue, 19 Dec 2023 14:26:35 +0800
Subject: [PATCH] drm/rockchip: vop2: clean up some inconsistent indenting
No functional modification involved.
drivers/gpu/drm/rockchip/rockchip_drm_vop2.c:1708 rk3588_calc_cru_cfg() warn: inconsistent indenting.
Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=7778
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231219062635.100718-1-jiapeng.chong@linux.alibaba.com
---
drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
@@ -1712,8 +1712,8 @@ static unsigned long rk3588_calc_cru_cfg
* *if_pixclk_div = dclk_rate / if_pixclk_rate;
* *if_dclk_div = dclk_rate / if_dclk_rate;
*/
- *if_pixclk_div = 2;
- *if_dclk_div = 4;
+ *if_pixclk_div = 2;
+ *if_dclk_div = 4;
} else if (vop2_output_if_is_edp(id)) {
/*
* edp_pixclk = edp_dclk > dclk_core

View File

@ -1,25 +0,0 @@
From 38709af26c33e398c3292e96837ccfde41fd9e6b Mon Sep 17 00:00:00 2001
From: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
Date: Thu, 4 Jan 2024 16:39:49 +0200
Subject: [PATCH] drm/rockchip: vop2: Drop superfluous include
The rockchip_drm_fb.h header contains just a single function which is
not directly used by the VOP2 driver. Drop the unnecessary include.
Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20240104143951.85219-1-cristian.ciocaltea@collabora.com
---
drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 1 -
1 file changed, 1 deletion(-)
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
@@ -34,7 +34,6 @@
#include <dt-bindings/soc/rockchip,vop2.h>
#include "rockchip_drm_gem.h"
-#include "rockchip_drm_fb.h"
#include "rockchip_drm_vop2.h"
#include "rockchip_rgb.h"

View File

@ -1,47 +0,0 @@
From 196da3f3f76a46905f7daab29c56974f1aba9a7a Mon Sep 17 00:00:00 2001
From: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
Date: Fri, 5 Jan 2024 19:40:06 +0200
Subject: [PATCH] drm/rockchip: vop2: Drop unused if_dclk_rate variable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Commit 5a028e8f062f ("drm/rockchip: vop2: Add support for rk3588")
introduced a variable which ended up being unused:
rockchip_drm_vop2.c:1688:23: warning: variable if_dclk_rate set but not used [-Wunused-but-set-variable]
This has been initially used as part of a formula to compute the clock
dividers, but eventually it has been replaced by static values.
Drop the variable declaration and move its assignment to the comment
block, to serve as documentation of how the constants have been
generated.
Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20240105174007.98054-1-cristian.ciocaltea@collabora.com
---
drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
@@ -1691,7 +1691,6 @@ static unsigned long rk3588_calc_cru_cfg
unsigned long dclk_core_rate = v_pixclk >> 2;
unsigned long dclk_rate = v_pixclk;
unsigned long dclk_out_rate;
- unsigned long if_dclk_rate;
unsigned long if_pixclk_rate;
int K = 1;
@@ -1706,8 +1705,8 @@ static unsigned long rk3588_calc_cru_cfg
}
if_pixclk_rate = (dclk_core_rate << 1) / K;
- if_dclk_rate = dclk_core_rate / K;
/*
+ * if_dclk_rate = dclk_core_rate / K;
* *if_pixclk_div = dclk_rate / if_pixclk_rate;
* *if_dclk_div = dclk_rate / if_dclk_rate;
*/

View File

@ -1,31 +0,0 @@
From b6ddaa63f728d26c12048aed76be99c24f435c41 Mon Sep 17 00:00:00 2001
From: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
Date: Fri, 19 Jan 2024 11:08:40 -0800
Subject: [PATCH] drm/rockchip: vop2: add a missing unlock in
vop2_crtc_atomic_enable()
Unlock before returning on the error path.
Fixes: 5a028e8f062f ("drm/rockchip: vop2: Add support for rk3588")
Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
Reviewed-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20240119190841.1619443-1-harshit.m.mogalapalli@oracle.com
---
drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
@@ -1996,8 +1996,10 @@ static void vop2_crtc_atomic_enable(stru
clock = vop2_set_intf_mux(vp, rkencoder->crtc_endpoint_id, polflags);
}
- if (!clock)
+ if (!clock) {
+ vop2_unlock(vop2);
return;
+ }
if (vcstate->output_mode == ROCKCHIP_OUT_MODE_AAAA &&
!(vp_data->feature & VOP2_VP_FEATURE_OUTPUT_10BIT))

View File

@ -1,78 +0,0 @@
From fe7acaa727e135621c062caa2d6d3ad4ad0b0185 Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@redhat.com>
Date: Wed, 20 Sep 2023 16:42:35 +0200
Subject: [PATCH] drm/gpuvm: allow building as module
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Currently, the DRM GPUVM does not have any core dependencies preventing
a module build.
Also, new features from subsequent patches require helpers (namely
drm_exec) which can be built as module.
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230920144343.64830-3-dakr@redhat.com
---
drivers/gpu/drm/Kconfig | 7 +++++++
drivers/gpu/drm/Makefile | 2 +-
drivers/gpu/drm/drm_gpuvm.c | 3 +++
drivers/gpu/drm/nouveau/Kconfig | 1 +
4 files changed, 12 insertions(+), 1 deletion(-)
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -217,6 +217,13 @@ config DRM_EXEC
help
Execution context for command submissions
+config DRM_GPUVM
+ tristate
+ depends on DRM
+ help
+ GPU-VM representation providing helpers to manage a GPUs virtual
+ address space
+
config DRM_BUDDY
tristate
depends on DRM
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -45,7 +45,6 @@ drm-y := \
drm_vblank.o \
drm_vblank_work.o \
drm_vma_manager.o \
- drm_gpuvm.o \
drm_writeback.o
drm-$(CONFIG_DRM_LEGACY) += \
drm_agpsupport.o \
@@ -81,6 +80,7 @@ obj-$(CONFIG_DRM_PANEL_ORIENTATION_QUIRK
#
#
obj-$(CONFIG_DRM_EXEC) += drm_exec.o
+obj-$(CONFIG_DRM_GPUVM) += drm_gpuvm.o
obj-$(CONFIG_DRM_BUDDY) += drm_buddy.o
--- a/drivers/gpu/drm/drm_gpuvm.c
+++ b/drivers/gpu/drm/drm_gpuvm.c
@@ -1721,3 +1721,6 @@ drm_gpuva_ops_free(struct drm_gpuvm *gpu
kfree(ops);
}
EXPORT_SYMBOL_GPL(drm_gpuva_ops_free);
+
+MODULE_DESCRIPTION("DRM GPUVM");
+MODULE_LICENSE("GPL");
--- a/drivers/gpu/drm/nouveau/Kconfig
+++ b/drivers/gpu/drm/nouveau/Kconfig
@@ -11,6 +11,7 @@ config DRM_NOUVEAU
select DRM_TTM
select DRM_TTM_HELPER
select DRM_EXEC
+ select DRM_GPUVM
select DRM_SCHED
select I2C
select I2C_ALGOBIT

View File

@ -1,208 +0,0 @@
From 78f54469b871db5ba8ea49abd4e5994e97bd525b Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@redhat.com>
Date: Wed, 20 Sep 2023 16:42:36 +0200
Subject: [PATCH] drm/nouveau: uvmm: rename 'umgr' to 'base'
Rename struct drm_gpuvm within struct nouveau_uvmm from 'umgr' to base.
Reviewed-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230920144343.64830-4-dakr@redhat.com
---
drivers/gpu/drm/nouveau/nouveau_debugfs.c | 2 +-
drivers/gpu/drm/nouveau/nouveau_exec.c | 4 +--
drivers/gpu/drm/nouveau/nouveau_uvmm.c | 32 +++++++++++------------
drivers/gpu/drm/nouveau/nouveau_uvmm.h | 6 ++---
4 files changed, 22 insertions(+), 22 deletions(-)
--- a/drivers/gpu/drm/nouveau/nouveau_debugfs.c
+++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.c
@@ -231,7 +231,7 @@ nouveau_debugfs_gpuva(struct seq_file *m
continue;
nouveau_uvmm_lock(uvmm);
- drm_debugfs_gpuva_info(m, &uvmm->umgr);
+ drm_debugfs_gpuva_info(m, &uvmm->base);
seq_puts(m, "\n");
nouveau_debugfs_gpuva_regions(m, uvmm);
nouveau_uvmm_unlock(uvmm);
--- a/drivers/gpu/drm/nouveau/nouveau_exec.c
+++ b/drivers/gpu/drm/nouveau/nouveau_exec.c
@@ -107,8 +107,8 @@ nouveau_exec_job_submit(struct nouveau_j
drm_exec_until_all_locked(exec) {
struct drm_gpuva *va;
- drm_gpuvm_for_each_va(va, &uvmm->umgr) {
- if (unlikely(va == &uvmm->umgr.kernel_alloc_node))
+ drm_gpuvm_for_each_va(va, &uvmm->base) {
+ if (unlikely(va == &uvmm->base.kernel_alloc_node))
continue;
ret = drm_exec_prepare_obj(exec, va->gem.obj, 1);
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
@@ -329,7 +329,7 @@ nouveau_uvma_region_create(struct nouvea
struct nouveau_uvma_region *reg;
int ret;
- if (!drm_gpuvm_interval_empty(&uvmm->umgr, addr, range))
+ if (!drm_gpuvm_interval_empty(&uvmm->base, addr, range))
return -ENOSPC;
ret = nouveau_uvma_region_alloc(&reg);
@@ -384,7 +384,7 @@ nouveau_uvma_region_empty(struct nouveau
{
struct nouveau_uvmm *uvmm = reg->uvmm;
- return drm_gpuvm_interval_empty(&uvmm->umgr,
+ return drm_gpuvm_interval_empty(&uvmm->base,
reg->va.addr,
reg->va.range);
}
@@ -589,7 +589,7 @@ op_map_prepare(struct nouveau_uvmm *uvmm
uvma->region = args->region;
uvma->kind = args->kind;
- drm_gpuva_map(&uvmm->umgr, &uvma->va, op);
+ drm_gpuva_map(&uvmm->base, &uvma->va, op);
/* Keep a reference until this uvma is destroyed. */
nouveau_uvma_gem_get(uvma);
@@ -1194,7 +1194,7 @@ nouveau_uvmm_bind_job_submit(struct nouv
goto unwind_continue;
}
- op->ops = drm_gpuvm_sm_unmap_ops_create(&uvmm->umgr,
+ op->ops = drm_gpuvm_sm_unmap_ops_create(&uvmm->base,
op->va.addr,
op->va.range);
if (IS_ERR(op->ops)) {
@@ -1205,7 +1205,7 @@ nouveau_uvmm_bind_job_submit(struct nouv
ret = nouveau_uvmm_sm_unmap_prepare(uvmm, &op->new,
op->ops);
if (ret) {
- drm_gpuva_ops_free(&uvmm->umgr, op->ops);
+ drm_gpuva_ops_free(&uvmm->base, op->ops);
op->ops = NULL;
op->reg = NULL;
goto unwind_continue;
@@ -1240,7 +1240,7 @@ nouveau_uvmm_bind_job_submit(struct nouv
}
}
- op->ops = drm_gpuvm_sm_map_ops_create(&uvmm->umgr,
+ op->ops = drm_gpuvm_sm_map_ops_create(&uvmm->base,
op->va.addr,
op->va.range,
op->gem.obj,
@@ -1256,7 +1256,7 @@ nouveau_uvmm_bind_job_submit(struct nouv
op->va.range,
op->flags & 0xff);
if (ret) {
- drm_gpuva_ops_free(&uvmm->umgr, op->ops);
+ drm_gpuva_ops_free(&uvmm->base, op->ops);
op->ops = NULL;
goto unwind_continue;
}
@@ -1264,7 +1264,7 @@ nouveau_uvmm_bind_job_submit(struct nouv
break;
}
case OP_UNMAP:
- op->ops = drm_gpuvm_sm_unmap_ops_create(&uvmm->umgr,
+ op->ops = drm_gpuvm_sm_unmap_ops_create(&uvmm->base,
op->va.addr,
op->va.range);
if (IS_ERR(op->ops)) {
@@ -1275,7 +1275,7 @@ nouveau_uvmm_bind_job_submit(struct nouv
ret = nouveau_uvmm_sm_unmap_prepare(uvmm, &op->new,
op->ops);
if (ret) {
- drm_gpuva_ops_free(&uvmm->umgr, op->ops);
+ drm_gpuva_ops_free(&uvmm->base, op->ops);
op->ops = NULL;
goto unwind_continue;
}
@@ -1406,7 +1406,7 @@ unwind:
break;
}
- drm_gpuva_ops_free(&uvmm->umgr, op->ops);
+ drm_gpuva_ops_free(&uvmm->base, op->ops);
op->ops = NULL;
op->reg = NULL;
}
@@ -1511,7 +1511,7 @@ nouveau_uvmm_bind_job_free_work_fn(struc
}
if (!IS_ERR_OR_NULL(op->ops))
- drm_gpuva_ops_free(&uvmm->umgr, op->ops);
+ drm_gpuva_ops_free(&uvmm->base, op->ops);
if (obj)
drm_gem_object_put(obj);
@@ -1838,7 +1838,7 @@ nouveau_uvmm_init(struct nouveau_uvmm *u
uvmm->kernel_managed_addr = kernel_managed_addr;
uvmm->kernel_managed_size = kernel_managed_size;
- drm_gpuvm_init(&uvmm->umgr, cli->name,
+ drm_gpuvm_init(&uvmm->base, cli->name,
NOUVEAU_VA_SPACE_START,
NOUVEAU_VA_SPACE_END,
kernel_managed_addr, kernel_managed_size,
@@ -1857,7 +1857,7 @@ nouveau_uvmm_init(struct nouveau_uvmm *u
return 0;
out_free_gpuva_mgr:
- drm_gpuvm_destroy(&uvmm->umgr);
+ drm_gpuvm_destroy(&uvmm->base);
out_unlock:
mutex_unlock(&cli->mutex);
return ret;
@@ -1879,11 +1879,11 @@ nouveau_uvmm_fini(struct nouveau_uvmm *u
wait_event(entity->job.wq, list_empty(&entity->job.list.head));
nouveau_uvmm_lock(uvmm);
- drm_gpuvm_for_each_va_safe(va, next, &uvmm->umgr) {
+ drm_gpuvm_for_each_va_safe(va, next, &uvmm->base) {
struct nouveau_uvma *uvma = uvma_from_va(va);
struct drm_gem_object *obj = va->gem.obj;
- if (unlikely(va == &uvmm->umgr.kernel_alloc_node))
+ if (unlikely(va == &uvmm->base.kernel_alloc_node))
continue;
drm_gpuva_remove(va);
@@ -1912,7 +1912,7 @@ nouveau_uvmm_fini(struct nouveau_uvmm *u
mutex_lock(&cli->mutex);
nouveau_vmm_fini(&uvmm->vmm);
- drm_gpuvm_destroy(&uvmm->umgr);
+ drm_gpuvm_destroy(&uvmm->base);
mutex_unlock(&cli->mutex);
dma_resv_fini(&uvmm->resv);
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.h
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.h
@@ -8,8 +8,8 @@
#include "nouveau_drv.h"
struct nouveau_uvmm {
+ struct drm_gpuvm base;
struct nouveau_vmm vmm;
- struct drm_gpuvm umgr;
struct maple_tree region_mt;
struct mutex mutex;
struct dma_resv resv;
@@ -41,10 +41,10 @@ struct nouveau_uvma {
u8 kind;
};
-#define uvmm_from_mgr(x) container_of((x), struct nouveau_uvmm, umgr)
+#define uvmm_from_gpuvm(x) container_of((x), struct nouveau_uvmm, base)
#define uvma_from_va(x) container_of((x), struct nouveau_uvma, va)
-#define to_uvmm(x) uvmm_from_mgr((x)->va.vm)
+#define to_uvmm(x) uvmm_from_gpuvm((x)->va.vm)
struct nouveau_uvmm_bind_job {
struct nouveau_job base;

View File

@ -1,45 +0,0 @@
From f7749a549b4f4db0c02e6b3d3800ea400dd76c12 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Tue, 10 Oct 2023 16:27:24 +0200
Subject: [PATCH] drm/gpuvm: Dual-licence the drm_gpuvm code GPL-2.0 OR MIT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Dual-licence in order to make it possible for other non-GPL os'es
to re-implement the code. The use of EXPORT_SYMBOL_GPL() is intentionally
left untouched to prevent use of drm_gpuvm as a proxy for non-GPL drivers
to access GPL-only kernel symbols.
Much of the ideas and algorithms used in the drm_gpuvm code is already
present in one way or another in MIT-licensed code.
Cc: Danilo Krummrich <dakr@redhat.com>
Cc: airlied@gmail.com
Cc: daniel@ffwll.ch
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Acked-by: Danilo Krummrich <dakr@redhat.com>
Reviewed-by: Francois Dugast <francois.dugast@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231010142725.8920-1-thomas.hellstrom@linux.intel.com
---
drivers/gpu/drm/drm_gpuvm.c | 2 +-
include/drm/drm_gpuvm.h | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
--- a/drivers/gpu/drm/drm_gpuvm.c
+++ b/drivers/gpu/drm/drm_gpuvm.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0-only
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
* Copyright (c) 2022 Red Hat.
*
--- a/include/drm/drm_gpuvm.h
+++ b/include/drm/drm_gpuvm.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
#ifndef __DRM_GPUVM_H__
#define __DRM_GPUVM_H__

View File

@ -1,165 +0,0 @@
From 546ca4d35dccaca6613766ed36ccfb2b5bd63bfe Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@redhat.com>
Date: Wed, 8 Nov 2023 01:12:31 +0100
Subject: [PATCH] drm/gpuvm: convert WARN() to drm_WARN() variants
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Use drm_WARN() and drm_WARN_ON() variants to indicate drivers the
context the failing VM resides in.
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-2-dakr@redhat.com
---
drivers/gpu/drm/drm_gpuvm.c | 32 ++++++++++++++------------
drivers/gpu/drm/nouveau/nouveau_uvmm.c | 3 ++-
include/drm/drm_gpuvm.h | 7 ++++++
3 files changed, 26 insertions(+), 16 deletions(-)
--- a/drivers/gpu/drm/drm_gpuvm.c
+++ b/drivers/gpu/drm/drm_gpuvm.c
@@ -614,12 +614,12 @@ static int __drm_gpuva_insert(struct drm
static void __drm_gpuva_remove(struct drm_gpuva *va);
static bool
-drm_gpuvm_check_overflow(u64 addr, u64 range)
+drm_gpuvm_check_overflow(struct drm_gpuvm *gpuvm, u64 addr, u64 range)
{
u64 end;
- return WARN(check_add_overflow(addr, range, &end),
- "GPUVA address limited to %zu bytes.\n", sizeof(end));
+ return drm_WARN(gpuvm->drm, check_add_overflow(addr, range, &end),
+ "GPUVA address limited to %zu bytes.\n", sizeof(end));
}
static bool
@@ -647,7 +647,7 @@ static bool
drm_gpuvm_range_valid(struct drm_gpuvm *gpuvm,
u64 addr, u64 range)
{
- return !drm_gpuvm_check_overflow(addr, range) &&
+ return !drm_gpuvm_check_overflow(gpuvm, addr, range) &&
drm_gpuvm_in_mm_range(gpuvm, addr, range) &&
!drm_gpuvm_in_kernel_node(gpuvm, addr, range);
}
@@ -656,6 +656,7 @@ drm_gpuvm_range_valid(struct drm_gpuvm *
* drm_gpuvm_init() - initialize a &drm_gpuvm
* @gpuvm: pointer to the &drm_gpuvm to initialize
* @name: the name of the GPU VA space
+ * @drm: the &drm_device this VM resides in
* @start_offset: the start offset of the GPU VA space
* @range: the size of the GPU VA space
* @reserve_offset: the start of the kernel reserved GPU VA area
@@ -668,8 +669,8 @@ drm_gpuvm_range_valid(struct drm_gpuvm *
* &name is expected to be managed by the surrounding driver structures.
*/
void
-drm_gpuvm_init(struct drm_gpuvm *gpuvm,
- const char *name,
+drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name,
+ struct drm_device *drm,
u64 start_offset, u64 range,
u64 reserve_offset, u64 reserve_range,
const struct drm_gpuvm_ops *ops)
@@ -677,20 +678,20 @@ drm_gpuvm_init(struct drm_gpuvm *gpuvm,
gpuvm->rb.tree = RB_ROOT_CACHED;
INIT_LIST_HEAD(&gpuvm->rb.list);
- drm_gpuvm_check_overflow(start_offset, range);
- gpuvm->mm_start = start_offset;
- gpuvm->mm_range = range;
-
gpuvm->name = name ? name : "unknown";
gpuvm->ops = ops;
+ gpuvm->drm = drm;
- memset(&gpuvm->kernel_alloc_node, 0, sizeof(struct drm_gpuva));
+ drm_gpuvm_check_overflow(gpuvm, start_offset, range);
+ gpuvm->mm_start = start_offset;
+ gpuvm->mm_range = range;
+ memset(&gpuvm->kernel_alloc_node, 0, sizeof(struct drm_gpuva));
if (reserve_range) {
gpuvm->kernel_alloc_node.va.addr = reserve_offset;
gpuvm->kernel_alloc_node.va.range = reserve_range;
- if (likely(!drm_gpuvm_check_overflow(reserve_offset,
+ if (likely(!drm_gpuvm_check_overflow(gpuvm, reserve_offset,
reserve_range)))
__drm_gpuva_insert(gpuvm, &gpuvm->kernel_alloc_node);
}
@@ -712,8 +713,8 @@ drm_gpuvm_destroy(struct drm_gpuvm *gpuv
if (gpuvm->kernel_alloc_node.va.range)
__drm_gpuva_remove(&gpuvm->kernel_alloc_node);
- WARN(!RB_EMPTY_ROOT(&gpuvm->rb.tree.rb_root),
- "GPUVA tree is not empty, potentially leaking memory.");
+ drm_WARN(gpuvm->drm, !RB_EMPTY_ROOT(&gpuvm->rb.tree.rb_root),
+ "GPUVA tree is not empty, potentially leaking memory.\n");
}
EXPORT_SYMBOL_GPL(drm_gpuvm_destroy);
@@ -795,7 +796,8 @@ drm_gpuva_remove(struct drm_gpuva *va)
struct drm_gpuvm *gpuvm = va->vm;
if (unlikely(va == &gpuvm->kernel_alloc_node)) {
- WARN(1, "Can't destroy kernel reserved node.\n");
+ drm_WARN(gpuvm->drm, 1,
+ "Can't destroy kernel reserved node.\n");
return;
}
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
@@ -1810,6 +1810,7 @@ int
nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli,
u64 kernel_managed_addr, u64 kernel_managed_size)
{
+ struct drm_device *drm = cli->drm->dev;
int ret;
u64 kernel_managed_end = kernel_managed_addr + kernel_managed_size;
@@ -1838,7 +1839,7 @@ nouveau_uvmm_init(struct nouveau_uvmm *u
uvmm->kernel_managed_addr = kernel_managed_addr;
uvmm->kernel_managed_size = kernel_managed_size;
- drm_gpuvm_init(&uvmm->base, cli->name,
+ drm_gpuvm_init(&uvmm->base, cli->name, drm,
NOUVEAU_VA_SPACE_START,
NOUVEAU_VA_SPACE_END,
kernel_managed_addr, kernel_managed_size,
--- a/include/drm/drm_gpuvm.h
+++ b/include/drm/drm_gpuvm.h
@@ -29,6 +29,7 @@
#include <linux/rbtree.h>
#include <linux/types.h>
+#include <drm/drm_device.h>
#include <drm/drm_gem.h>
struct drm_gpuvm;
@@ -202,6 +203,11 @@ struct drm_gpuvm {
const char *name;
/**
+ * @drm: the &drm_device this VM lives in
+ */
+ struct drm_device *drm;
+
+ /**
* @mm_start: start of the VA space
*/
u64 mm_start;
@@ -241,6 +247,7 @@ struct drm_gpuvm {
};
void drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name,
+ struct drm_device *drm,
u64 start_offset, u64 range,
u64 reserve_offset, u64 reserve_range,
const struct drm_gpuvm_ops *ops);

View File

@ -1,61 +0,0 @@
From 9297cfc9405bc6b60540b8b8aaf930b7e449e15a Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@redhat.com>
Date: Wed, 8 Nov 2023 01:12:33 +0100
Subject: [PATCH] drm/gpuvm: export drm_gpuvm_range_valid()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Drivers may use this function to validate userspace requests in advance,
hence export it.
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-4-dakr@redhat.com
---
drivers/gpu/drm/drm_gpuvm.c | 14 +++++++++++++-
include/drm/drm_gpuvm.h | 1 +
2 files changed, 14 insertions(+), 1 deletion(-)
--- a/drivers/gpu/drm/drm_gpuvm.c
+++ b/drivers/gpu/drm/drm_gpuvm.c
@@ -643,7 +643,18 @@ drm_gpuvm_in_kernel_node(struct drm_gpuv
return krange && addr < kend && kstart < end;
}
-static bool
+/**
+ * drm_gpuvm_range_valid() - checks whether the given range is valid for the
+ * given &drm_gpuvm
+ * @gpuvm: the GPUVM to check the range for
+ * @addr: the base address
+ * @range: the range starting from the base address
+ *
+ * Checks whether the range is within the GPUVM's managed boundaries.
+ *
+ * Returns: true for a valid range, false otherwise
+ */
+bool
drm_gpuvm_range_valid(struct drm_gpuvm *gpuvm,
u64 addr, u64 range)
{
@@ -651,6 +662,7 @@ drm_gpuvm_range_valid(struct drm_gpuvm *
drm_gpuvm_in_mm_range(gpuvm, addr, range) &&
!drm_gpuvm_in_kernel_node(gpuvm, addr, range);
}
+EXPORT_SYMBOL_GPL(drm_gpuvm_range_valid);
/**
* drm_gpuvm_init() - initialize a &drm_gpuvm
--- a/include/drm/drm_gpuvm.h
+++ b/include/drm/drm_gpuvm.h
@@ -253,6 +253,7 @@ void drm_gpuvm_init(struct drm_gpuvm *gp
const struct drm_gpuvm_ops *ops);
void drm_gpuvm_destroy(struct drm_gpuvm *gpuvm);
+bool drm_gpuvm_range_valid(struct drm_gpuvm *gpuvm, u64 addr, u64 range);
bool drm_gpuvm_interval_empty(struct drm_gpuvm *gpuvm, u64 addr, u64 range);
static inline struct drm_gpuva *

View File

@ -1,66 +0,0 @@
From b41e297abd2347075ec640daf0e5da576e3d7418 Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@redhat.com>
Date: Wed, 8 Nov 2023 01:12:34 +0100
Subject: [PATCH] drm/nouveau: make use of drm_gpuvm_range_valid()
Use drm_gpuvm_range_valid() in order to validate userspace requests.
Reviewed-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-5-dakr@redhat.com
---
drivers/gpu/drm/nouveau/nouveau_uvmm.c | 17 +----------------
drivers/gpu/drm/nouveau/nouveau_uvmm.h | 3 ---
2 files changed, 1 insertion(+), 19 deletions(-)
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
@@ -929,25 +929,13 @@ nouveau_uvmm_sm_unmap_cleanup(struct nou
static int
nouveau_uvmm_validate_range(struct nouveau_uvmm *uvmm, u64 addr, u64 range)
{
- u64 end = addr + range;
- u64 kernel_managed_end = uvmm->kernel_managed_addr +
- uvmm->kernel_managed_size;
-
if (addr & ~PAGE_MASK)
return -EINVAL;
if (range & ~PAGE_MASK)
return -EINVAL;
- if (end <= addr)
- return -EINVAL;
-
- if (addr < NOUVEAU_VA_SPACE_START ||
- end > NOUVEAU_VA_SPACE_END)
- return -EINVAL;
-
- if (addr < kernel_managed_end &&
- end > uvmm->kernel_managed_addr)
+ if (!drm_gpuvm_range_valid(&uvmm->base, addr, range))
return -EINVAL;
return 0;
@@ -1836,9 +1824,6 @@ nouveau_uvmm_init(struct nouveau_uvmm *u
goto out_unlock;
}
- uvmm->kernel_managed_addr = kernel_managed_addr;
- uvmm->kernel_managed_size = kernel_managed_size;
-
drm_gpuvm_init(&uvmm->base, cli->name, drm,
NOUVEAU_VA_SPACE_START,
NOUVEAU_VA_SPACE_END,
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.h
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.h
@@ -14,9 +14,6 @@ struct nouveau_uvmm {
struct mutex mutex;
struct dma_resv resv;
- u64 kernel_managed_addr;
- u64 kernel_managed_size;
-
bool disabled;
};

View File

@ -1,205 +0,0 @@
From bbe8458037e74b9887ba2f0f0b8084a13ade3a90 Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@redhat.com>
Date: Wed, 8 Nov 2023 01:12:35 +0100
Subject: [PATCH] drm/gpuvm: add common dma-resv per struct drm_gpuvm
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Provide a common dma-resv for GEM objects not being used outside of this
GPU-VM. This is used in a subsequent patch to generalize dma-resv,
external and evicted object handling and GEM validation.
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-6-dakr@redhat.com
---
drivers/gpu/drm/drm_gpuvm.c | 53 ++++++++++++++++++++++++++
drivers/gpu/drm/nouveau/nouveau_uvmm.c | 13 ++++++-
include/drm/drm_gpuvm.h | 33 ++++++++++++++++
3 files changed, 97 insertions(+), 2 deletions(-)
--- a/drivers/gpu/drm/drm_gpuvm.c
+++ b/drivers/gpu/drm/drm_gpuvm.c
@@ -61,6 +61,15 @@
* contained within struct drm_gpuva already. Hence, for inserting &drm_gpuva
* entries from within dma-fence signalling critical sections it is enough to
* pre-allocate the &drm_gpuva structures.
+ *
+ * &drm_gem_objects which are private to a single VM can share a common
+ * &dma_resv in order to improve locking efficiency (e.g. with &drm_exec).
+ * For this purpose drivers must pass a &drm_gem_object to drm_gpuvm_init(), in
+ * the following called 'resv object', which serves as the container of the
+ * GPUVM's shared &dma_resv. This resv object can be a driver specific
+ * &drm_gem_object, such as the &drm_gem_object containing the root page table,
+ * but it can also be a 'dummy' object, which can be allocated with
+ * drm_gpuvm_resv_object_alloc().
*/
/**
@@ -664,11 +673,49 @@ drm_gpuvm_range_valid(struct drm_gpuvm *
}
EXPORT_SYMBOL_GPL(drm_gpuvm_range_valid);
+static void
+drm_gpuvm_gem_object_free(struct drm_gem_object *obj)
+{
+ drm_gem_object_release(obj);
+ kfree(obj);
+}
+
+static const struct drm_gem_object_funcs drm_gpuvm_object_funcs = {
+ .free = drm_gpuvm_gem_object_free,
+};
+
+/**
+ * drm_gpuvm_resv_object_alloc() - allocate a dummy &drm_gem_object
+ * @drm: the drivers &drm_device
+ *
+ * Allocates a dummy &drm_gem_object which can be passed to drm_gpuvm_init() in
+ * order to serve as root GEM object providing the &drm_resv shared across
+ * &drm_gem_objects local to a single GPUVM.
+ *
+ * Returns: the &drm_gem_object on success, NULL on failure
+ */
+struct drm_gem_object *
+drm_gpuvm_resv_object_alloc(struct drm_device *drm)
+{
+ struct drm_gem_object *obj;
+
+ obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+ if (!obj)
+ return NULL;
+
+ obj->funcs = &drm_gpuvm_object_funcs;
+ drm_gem_private_object_init(drm, obj, 0);
+
+ return obj;
+}
+EXPORT_SYMBOL_GPL(drm_gpuvm_resv_object_alloc);
+
/**
* drm_gpuvm_init() - initialize a &drm_gpuvm
* @gpuvm: pointer to the &drm_gpuvm to initialize
* @name: the name of the GPU VA space
* @drm: the &drm_device this VM resides in
+ * @r_obj: the resv &drm_gem_object providing the GPUVM's common &dma_resv
* @start_offset: the start offset of the GPU VA space
* @range: the size of the GPU VA space
* @reserve_offset: the start of the kernel reserved GPU VA area
@@ -683,6 +730,7 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_range_valid)
void
drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name,
struct drm_device *drm,
+ struct drm_gem_object *r_obj,
u64 start_offset, u64 range,
u64 reserve_offset, u64 reserve_range,
const struct drm_gpuvm_ops *ops)
@@ -693,6 +741,9 @@ drm_gpuvm_init(struct drm_gpuvm *gpuvm,
gpuvm->name = name ? name : "unknown";
gpuvm->ops = ops;
gpuvm->drm = drm;
+ gpuvm->r_obj = r_obj;
+
+ drm_gem_object_get(r_obj);
drm_gpuvm_check_overflow(gpuvm, start_offset, range);
gpuvm->mm_start = start_offset;
@@ -727,6 +778,8 @@ drm_gpuvm_destroy(struct drm_gpuvm *gpuv
drm_WARN(gpuvm->drm, !RB_EMPTY_ROOT(&gpuvm->rb.tree.rb_root),
"GPUVA tree is not empty, potentially leaking memory.\n");
+
+ drm_gem_object_put(gpuvm->r_obj);
}
EXPORT_SYMBOL_GPL(drm_gpuvm_destroy);
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
@@ -1799,8 +1799,9 @@ nouveau_uvmm_init(struct nouveau_uvmm *u
u64 kernel_managed_addr, u64 kernel_managed_size)
{
struct drm_device *drm = cli->drm->dev;
- int ret;
+ struct drm_gem_object *r_obj;
u64 kernel_managed_end = kernel_managed_addr + kernel_managed_size;
+ int ret;
mutex_init(&uvmm->mutex);
dma_resv_init(&uvmm->resv);
@@ -1824,11 +1825,19 @@ nouveau_uvmm_init(struct nouveau_uvmm *u
goto out_unlock;
}
- drm_gpuvm_init(&uvmm->base, cli->name, drm,
+ r_obj = drm_gpuvm_resv_object_alloc(drm);
+ if (!r_obj) {
+ ret = -ENOMEM;
+ goto out_unlock;
+ }
+
+ drm_gpuvm_init(&uvmm->base, cli->name, drm, r_obj,
NOUVEAU_VA_SPACE_START,
NOUVEAU_VA_SPACE_END,
kernel_managed_addr, kernel_managed_size,
NULL);
+ /* GPUVM takes care from here on. */
+ drm_gem_object_put(r_obj);
ret = nvif_vmm_ctor(&cli->mmu, "uvmm",
cli->vmm.vmm.object.oclass, RAW,
--- a/include/drm/drm_gpuvm.h
+++ b/include/drm/drm_gpuvm.h
@@ -244,10 +244,16 @@ struct drm_gpuvm {
* @ops: &drm_gpuvm_ops providing the split/merge steps to drivers
*/
const struct drm_gpuvm_ops *ops;
+
+ /**
+ * @r_obj: Resv GEM object; representing the GPUVM's common &dma_resv.
+ */
+ struct drm_gem_object *r_obj;
};
void drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name,
struct drm_device *drm,
+ struct drm_gem_object *r_obj,
u64 start_offset, u64 range,
u64 reserve_offset, u64 reserve_range,
const struct drm_gpuvm_ops *ops);
@@ -256,6 +262,33 @@ void drm_gpuvm_destroy(struct drm_gpuvm
bool drm_gpuvm_range_valid(struct drm_gpuvm *gpuvm, u64 addr, u64 range);
bool drm_gpuvm_interval_empty(struct drm_gpuvm *gpuvm, u64 addr, u64 range);
+struct drm_gem_object *
+drm_gpuvm_resv_object_alloc(struct drm_device *drm);
+
+/**
+ * drm_gpuvm_resv() - returns the &drm_gpuvm's &dma_resv
+ * @gpuvm__: the &drm_gpuvm
+ *
+ * Returns: a pointer to the &drm_gpuvm's shared &dma_resv
+ */
+#define drm_gpuvm_resv(gpuvm__) ((gpuvm__)->r_obj->resv)
+
+/**
+ * drm_gpuvm_resv_obj() - returns the &drm_gem_object holding the &drm_gpuvm's
+ * &dma_resv
+ * @gpuvm__: the &drm_gpuvm
+ *
+ * Returns: a pointer to the &drm_gem_object holding the &drm_gpuvm's shared
+ * &dma_resv
+ */
+#define drm_gpuvm_resv_obj(gpuvm__) ((gpuvm__)->r_obj)
+
+#define drm_gpuvm_resv_held(gpuvm__) \
+ dma_resv_held(drm_gpuvm_resv(gpuvm__))
+
+#define drm_gpuvm_resv_assert_held(gpuvm__) \
+ dma_resv_assert_held(drm_gpuvm_resv(gpuvm__))
+
static inline struct drm_gpuva *
__drm_gpuva_next(struct drm_gpuva *va)
{

View File

@ -1,140 +0,0 @@
From 6118411428a393fb0868bad9025d71875418058b Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@redhat.com>
Date: Wed, 8 Nov 2023 01:12:36 +0100
Subject: [PATCH] drm/nouveau: make use of the GPUVM's shared dma-resv
DRM GEM objects private to a single GPUVM can use a shared dma-resv.
Make use of the shared dma-resv of GPUVM rather than a driver specific
one.
The shared dma-resv originates from a "root" GEM object serving as
container for the dma-resv to make it compatible with drm_exec.
In order to make sure the object proving the shared dma-resv can't be
freed up before the objects making use of it, let every such GEM object
take a reference on it.
Reviewed-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-7-dakr@redhat.com
---
drivers/gpu/drm/nouveau/nouveau_bo.c | 11 +++++++++--
drivers/gpu/drm/nouveau/nouveau_bo.h | 5 +++++
drivers/gpu/drm/nouveau/nouveau_gem.c | 10 ++++++++--
drivers/gpu/drm/nouveau/nouveau_uvmm.c | 7 ++-----
drivers/gpu/drm/nouveau/nouveau_uvmm.h | 1 -
5 files changed, 24 insertions(+), 10 deletions(-)
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -148,10 +148,17 @@ nouveau_bo_del_ttm(struct ttm_buffer_obj
* If nouveau_bo_new() allocated this buffer, the GEM object was never
* initialized, so don't attempt to release it.
*/
- if (bo->base.dev)
+ if (bo->base.dev) {
+ /* Gem objects not being shared with other VMs get their
+ * dma_resv from a root GEM object.
+ */
+ if (nvbo->no_share)
+ drm_gem_object_put(nvbo->r_obj);
+
drm_gem_object_release(&bo->base);
- else
+ } else {
dma_resv_fini(&bo->base._resv);
+ }
kfree(nvbo);
}
--- a/drivers/gpu/drm/nouveau/nouveau_bo.h
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.h
@@ -26,6 +26,11 @@ struct nouveau_bo {
struct list_head entry;
int pbbo_index;
bool validate_mapped;
+
+ /* Root GEM object we derive the dma_resv of in case this BO is not
+ * shared between VMs.
+ */
+ struct drm_gem_object *r_obj;
bool no_share;
/* GPU address space is independent of CPU word size */
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -111,7 +111,8 @@ nouveau_gem_object_open(struct drm_gem_o
if (vmm->vmm.object.oclass < NVIF_CLASS_VMM_NV50)
return 0;
- if (nvbo->no_share && uvmm && &uvmm->resv != nvbo->bo.base.resv)
+ if (nvbo->no_share && uvmm &&
+ drm_gpuvm_resv(&uvmm->base) != nvbo->bo.base.resv)
return -EPERM;
ret = ttm_bo_reserve(&nvbo->bo, false, false, NULL);
@@ -245,7 +246,7 @@ nouveau_gem_new(struct nouveau_cli *cli,
if (unlikely(!uvmm))
return -EINVAL;
- resv = &uvmm->resv;
+ resv = drm_gpuvm_resv(&uvmm->base);
}
if (!(domain & (NOUVEAU_GEM_DOMAIN_VRAM | NOUVEAU_GEM_DOMAIN_GART)))
@@ -288,6 +289,11 @@ nouveau_gem_new(struct nouveau_cli *cli,
if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA)
nvbo->valid_domains &= domain;
+ if (nvbo->no_share) {
+ nvbo->r_obj = drm_gpuvm_resv_obj(&uvmm->base);
+ drm_gem_object_get(nvbo->r_obj);
+ }
+
*pnvbo = nvbo;
return 0;
}
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
@@ -1804,7 +1804,6 @@ nouveau_uvmm_init(struct nouveau_uvmm *u
int ret;
mutex_init(&uvmm->mutex);
- dma_resv_init(&uvmm->resv);
mt_init_flags(&uvmm->region_mt, MT_FLAGS_LOCK_EXTERN);
mt_set_external_lock(&uvmm->region_mt, &uvmm->mutex);
@@ -1844,14 +1843,14 @@ nouveau_uvmm_init(struct nouveau_uvmm *u
kernel_managed_addr, kernel_managed_size,
NULL, 0, &cli->uvmm.vmm.vmm);
if (ret)
- goto out_free_gpuva_mgr;
+ goto out_gpuvm_fini;
cli->uvmm.vmm.cli = cli;
mutex_unlock(&cli->mutex);
return 0;
-out_free_gpuva_mgr:
+out_gpuvm_fini:
drm_gpuvm_destroy(&uvmm->base);
out_unlock:
mutex_unlock(&cli->mutex);
@@ -1909,6 +1908,4 @@ nouveau_uvmm_fini(struct nouveau_uvmm *u
nouveau_vmm_fini(&uvmm->vmm);
drm_gpuvm_destroy(&uvmm->base);
mutex_unlock(&cli->mutex);
-
- dma_resv_fini(&uvmm->resv);
}
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.h
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.h
@@ -12,7 +12,6 @@ struct nouveau_uvmm {
struct nouveau_vmm vmm;
struct maple_tree region_mt;
struct mutex mutex;
- struct dma_resv resv;
bool disabled;
};

View File

@ -1,98 +0,0 @@
From 809ef191ee600e8bcbe2f8a769e00d2d54c16094 Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@redhat.com>
Date: Wed, 8 Nov 2023 01:12:37 +0100
Subject: [PATCH] drm/gpuvm: add drm_gpuvm_flags to drm_gpuvm
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Introduce flags for struct drm_gpuvm, this required by subsequent
commits.
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-8-dakr@redhat.com
---
drivers/gpu/drm/drm_gpuvm.c | 3 +++
drivers/gpu/drm/nouveau/nouveau_uvmm.c | 2 +-
include/drm/drm_gpuvm.h | 16 ++++++++++++++++
3 files changed, 20 insertions(+), 1 deletion(-)
--- a/drivers/gpu/drm/drm_gpuvm.c
+++ b/drivers/gpu/drm/drm_gpuvm.c
@@ -714,6 +714,7 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_resv_object_
* drm_gpuvm_init() - initialize a &drm_gpuvm
* @gpuvm: pointer to the &drm_gpuvm to initialize
* @name: the name of the GPU VA space
+ * @flags: the &drm_gpuvm_flags for this GPUVM
* @drm: the &drm_device this VM resides in
* @r_obj: the resv &drm_gem_object providing the GPUVM's common &dma_resv
* @start_offset: the start offset of the GPU VA space
@@ -729,6 +730,7 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_resv_object_
*/
void
drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name,
+ enum drm_gpuvm_flags flags,
struct drm_device *drm,
struct drm_gem_object *r_obj,
u64 start_offset, u64 range,
@@ -739,6 +741,7 @@ drm_gpuvm_init(struct drm_gpuvm *gpuvm,
INIT_LIST_HEAD(&gpuvm->rb.list);
gpuvm->name = name ? name : "unknown";
+ gpuvm->flags = flags;
gpuvm->ops = ops;
gpuvm->drm = drm;
gpuvm->r_obj = r_obj;
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
@@ -1830,7 +1830,7 @@ nouveau_uvmm_init(struct nouveau_uvmm *u
goto out_unlock;
}
- drm_gpuvm_init(&uvmm->base, cli->name, drm, r_obj,
+ drm_gpuvm_init(&uvmm->base, cli->name, 0, drm, r_obj,
NOUVEAU_VA_SPACE_START,
NOUVEAU_VA_SPACE_END,
kernel_managed_addr, kernel_managed_size,
--- a/include/drm/drm_gpuvm.h
+++ b/include/drm/drm_gpuvm.h
@@ -185,6 +185,16 @@ static inline bool drm_gpuva_invalidated
}
/**
+ * enum drm_gpuvm_flags - flags for struct drm_gpuvm
+ */
+enum drm_gpuvm_flags {
+ /**
+ * @DRM_GPUVM_USERBITS: user defined bits
+ */
+ DRM_GPUVM_USERBITS = BIT(0),
+};
+
+/**
* struct drm_gpuvm - DRM GPU VA Manager
*
* The DRM GPU VA Manager keeps track of a GPU's virtual address space by using
@@ -203,6 +213,11 @@ struct drm_gpuvm {
const char *name;
/**
+ * @flags: the &drm_gpuvm_flags of this GPUVM
+ */
+ enum drm_gpuvm_flags flags;
+
+ /**
* @drm: the &drm_device this VM lives in
*/
struct drm_device *drm;
@@ -252,6 +267,7 @@ struct drm_gpuvm {
};
void drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name,
+ enum drm_gpuvm_flags flags,
struct drm_device *drm,
struct drm_gem_object *r_obj,
u64 start_offset, u64 range,

View File

@ -1,219 +0,0 @@
From 266f7618e761c8a6aa89dbfe43cda1b69cdbbf14 Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@redhat.com>
Date: Wed, 8 Nov 2023 01:12:38 +0100
Subject: [PATCH] drm/nouveau: separately allocate struct nouveau_uvmm
Allocate struct nouveau_uvmm separately in preparation for subsequent
commits introducing reference counting for struct drm_gpuvm.
While at it, get rid of nouveau_uvmm_init() as indirection of
nouveau_uvmm_ioctl_vm_init() and perform some minor cleanups.
Reviewed-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-9-dakr@redhat.com
---
drivers/gpu/drm/nouveau/nouveau_drm.c | 5 +-
drivers/gpu/drm/nouveau/nouveau_drv.h | 10 ++--
drivers/gpu/drm/nouveau/nouveau_uvmm.c | 63 +++++++++++++-------------
drivers/gpu/drm/nouveau/nouveau_uvmm.h | 4 --
4 files changed, 40 insertions(+), 42 deletions(-)
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -190,6 +190,8 @@ nouveau_cli_work_queue(struct nouveau_cl
static void
nouveau_cli_fini(struct nouveau_cli *cli)
{
+ struct nouveau_uvmm *uvmm = nouveau_cli_uvmm_locked(cli);
+
/* All our channels are dead now, which means all the fences they
* own are signalled, and all callback functions have been called.
*
@@ -199,7 +201,8 @@ nouveau_cli_fini(struct nouveau_cli *cli
WARN_ON(!list_empty(&cli->worker));
usif_client_fini(cli);
- nouveau_uvmm_fini(&cli->uvmm);
+ if (uvmm)
+ nouveau_uvmm_fini(uvmm);
nouveau_sched_entity_fini(&cli->sched_entity);
nouveau_vmm_fini(&cli->svm);
nouveau_vmm_fini(&cli->vmm);
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -93,7 +93,10 @@ struct nouveau_cli {
struct nvif_mmu mmu;
struct nouveau_vmm vmm;
struct nouveau_vmm svm;
- struct nouveau_uvmm uvmm;
+ struct {
+ struct nouveau_uvmm *ptr;
+ bool disabled;
+ } uvmm;
struct nouveau_sched_entity sched_entity;
@@ -121,10 +124,7 @@ struct nouveau_cli_work {
static inline struct nouveau_uvmm *
nouveau_cli_uvmm(struct nouveau_cli *cli)
{
- if (!cli || !cli->uvmm.vmm.cli)
- return NULL;
-
- return &cli->uvmm;
+ return cli ? cli->uvmm.ptr : NULL;
}
static inline struct nouveau_uvmm *
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
@@ -1638,18 +1638,6 @@ err_free:
return ret;
}
-int
-nouveau_uvmm_ioctl_vm_init(struct drm_device *dev,
- void *data,
- struct drm_file *file_priv)
-{
- struct nouveau_cli *cli = nouveau_cli(file_priv);
- struct drm_nouveau_vm_init *init = data;
-
- return nouveau_uvmm_init(&cli->uvmm, cli, init->kernel_managed_addr,
- init->kernel_managed_size);
-}
-
static int
nouveau_uvmm_vm_bind(struct nouveau_uvmm_bind_job_args *args)
{
@@ -1795,17 +1783,25 @@ nouveau_uvmm_bo_unmap_all(struct nouveau
}
int
-nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli,
- u64 kernel_managed_addr, u64 kernel_managed_size)
+nouveau_uvmm_ioctl_vm_init(struct drm_device *dev,
+ void *data,
+ struct drm_file *file_priv)
{
+ struct nouveau_uvmm *uvmm;
+ struct nouveau_cli *cli = nouveau_cli(file_priv);
struct drm_device *drm = cli->drm->dev;
struct drm_gem_object *r_obj;
- u64 kernel_managed_end = kernel_managed_addr + kernel_managed_size;
+ struct drm_nouveau_vm_init *init = data;
+ u64 kernel_managed_end;
int ret;
- mutex_init(&uvmm->mutex);
- mt_init_flags(&uvmm->region_mt, MT_FLAGS_LOCK_EXTERN);
- mt_set_external_lock(&uvmm->region_mt, &uvmm->mutex);
+ if (check_add_overflow(init->kernel_managed_addr,
+ init->kernel_managed_size,
+ &kernel_managed_end))
+ return -EINVAL;
+
+ if (kernel_managed_end > NOUVEAU_VA_SPACE_END)
+ return -EINVAL;
mutex_lock(&cli->mutex);
@@ -1814,44 +1810,49 @@ nouveau_uvmm_init(struct nouveau_uvmm *u
goto out_unlock;
}
- if (kernel_managed_end <= kernel_managed_addr) {
- ret = -EINVAL;
- goto out_unlock;
- }
-
- if (kernel_managed_end > NOUVEAU_VA_SPACE_END) {
- ret = -EINVAL;
+ uvmm = kzalloc(sizeof(*uvmm), GFP_KERNEL);
+ if (!uvmm) {
+ ret = -ENOMEM;
goto out_unlock;
}
r_obj = drm_gpuvm_resv_object_alloc(drm);
if (!r_obj) {
+ kfree(uvmm);
ret = -ENOMEM;
goto out_unlock;
}
+ mutex_init(&uvmm->mutex);
+ mt_init_flags(&uvmm->region_mt, MT_FLAGS_LOCK_EXTERN);
+ mt_set_external_lock(&uvmm->region_mt, &uvmm->mutex);
+
drm_gpuvm_init(&uvmm->base, cli->name, 0, drm, r_obj,
NOUVEAU_VA_SPACE_START,
NOUVEAU_VA_SPACE_END,
- kernel_managed_addr, kernel_managed_size,
+ init->kernel_managed_addr,
+ init->kernel_managed_size,
NULL);
/* GPUVM takes care from here on. */
drm_gem_object_put(r_obj);
ret = nvif_vmm_ctor(&cli->mmu, "uvmm",
cli->vmm.vmm.object.oclass, RAW,
- kernel_managed_addr, kernel_managed_size,
- NULL, 0, &cli->uvmm.vmm.vmm);
+ init->kernel_managed_addr,
+ init->kernel_managed_size,
+ NULL, 0, &uvmm->vmm.vmm);
if (ret)
goto out_gpuvm_fini;
- cli->uvmm.vmm.cli = cli;
+ uvmm->vmm.cli = cli;
+ cli->uvmm.ptr = uvmm;
mutex_unlock(&cli->mutex);
return 0;
out_gpuvm_fini:
drm_gpuvm_destroy(&uvmm->base);
+ kfree(uvmm);
out_unlock:
mutex_unlock(&cli->mutex);
return ret;
@@ -1866,9 +1867,6 @@ nouveau_uvmm_fini(struct nouveau_uvmm *u
struct nouveau_sched_entity *entity = &cli->sched_entity;
struct drm_gpuva *va, *next;
- if (!cli)
- return;
-
rmb(); /* for list_empty to work without lock */
wait_event(entity->job.wq, list_empty(&entity->job.list.head));
@@ -1907,5 +1905,6 @@ nouveau_uvmm_fini(struct nouveau_uvmm *u
mutex_lock(&cli->mutex);
nouveau_vmm_fini(&uvmm->vmm);
drm_gpuvm_destroy(&uvmm->base);
+ kfree(uvmm);
mutex_unlock(&cli->mutex);
}
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.h
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.h
@@ -12,8 +12,6 @@ struct nouveau_uvmm {
struct nouveau_vmm vmm;
struct maple_tree region_mt;
struct mutex mutex;
-
- bool disabled;
};
struct nouveau_uvma_region {
@@ -78,8 +76,6 @@ struct nouveau_uvmm_bind_job_args {
#define to_uvmm_bind_job(job) container_of((job), struct nouveau_uvmm_bind_job, base)
-int nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli,
- u64 kernel_managed_addr, u64 kernel_managed_size);
void nouveau_uvmm_fini(struct nouveau_uvmm *uvmm);
void nouveau_uvmm_bo_map_all(struct nouveau_bo *nvbov, struct nouveau_mem *mem);

View File

@ -1,221 +0,0 @@
From 8af72338dd81d1f8667e0240bd28f5fc98b3f20d Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@redhat.com>
Date: Wed, 8 Nov 2023 01:12:39 +0100
Subject: [PATCH] drm/gpuvm: reference count drm_gpuvm structures
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Implement reference counting for struct drm_gpuvm.
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-10-dakr@redhat.com
---
drivers/gpu/drm/drm_gpuvm.c | 56 +++++++++++++++++++++-----
drivers/gpu/drm/nouveau/nouveau_uvmm.c | 20 ++++++---
include/drm/drm_gpuvm.h | 31 +++++++++++++-
3 files changed, 90 insertions(+), 17 deletions(-)
--- a/drivers/gpu/drm/drm_gpuvm.c
+++ b/drivers/gpu/drm/drm_gpuvm.c
@@ -740,6 +740,8 @@ drm_gpuvm_init(struct drm_gpuvm *gpuvm,
gpuvm->rb.tree = RB_ROOT_CACHED;
INIT_LIST_HEAD(&gpuvm->rb.list);
+ kref_init(&gpuvm->kref);
+
gpuvm->name = name ? name : "unknown";
gpuvm->flags = flags;
gpuvm->ops = ops;
@@ -764,15 +766,8 @@ drm_gpuvm_init(struct drm_gpuvm *gpuvm,
}
EXPORT_SYMBOL_GPL(drm_gpuvm_init);
-/**
- * drm_gpuvm_destroy() - cleanup a &drm_gpuvm
- * @gpuvm: pointer to the &drm_gpuvm to clean up
- *
- * Note that it is a bug to call this function on a manager that still
- * holds GPU VA mappings.
- */
-void
-drm_gpuvm_destroy(struct drm_gpuvm *gpuvm)
+static void
+drm_gpuvm_fini(struct drm_gpuvm *gpuvm)
{
gpuvm->name = NULL;
@@ -784,7 +779,35 @@ drm_gpuvm_destroy(struct drm_gpuvm *gpuv
drm_gem_object_put(gpuvm->r_obj);
}
-EXPORT_SYMBOL_GPL(drm_gpuvm_destroy);
+
+static void
+drm_gpuvm_free(struct kref *kref)
+{
+ struct drm_gpuvm *gpuvm = container_of(kref, struct drm_gpuvm, kref);
+
+ drm_gpuvm_fini(gpuvm);
+
+ if (drm_WARN_ON(gpuvm->drm, !gpuvm->ops->vm_free))
+ return;
+
+ gpuvm->ops->vm_free(gpuvm);
+}
+
+/**
+ * drm_gpuvm_put() - drop a struct drm_gpuvm reference
+ * @gpuvm: the &drm_gpuvm to release the reference of
+ *
+ * This releases a reference to @gpuvm.
+ *
+ * This function may be called from atomic context.
+ */
+void
+drm_gpuvm_put(struct drm_gpuvm *gpuvm)
+{
+ if (gpuvm)
+ kref_put(&gpuvm->kref, drm_gpuvm_free);
+}
+EXPORT_SYMBOL_GPL(drm_gpuvm_put);
static int
__drm_gpuva_insert(struct drm_gpuvm *gpuvm,
@@ -833,11 +856,21 @@ drm_gpuva_insert(struct drm_gpuvm *gpuvm
{
u64 addr = va->va.addr;
u64 range = va->va.range;
+ int ret;
if (unlikely(!drm_gpuvm_range_valid(gpuvm, addr, range)))
return -EINVAL;
- return __drm_gpuva_insert(gpuvm, va);
+ ret = __drm_gpuva_insert(gpuvm, va);
+ if (likely(!ret))
+ /* Take a reference of the GPUVM for the successfully inserted
+ * drm_gpuva. We can't take the reference in
+ * __drm_gpuva_insert() itself, since we don't want to increse
+ * the reference count for the GPUVM's kernel_alloc_node.
+ */
+ drm_gpuvm_get(gpuvm);
+
+ return ret;
}
EXPORT_SYMBOL_GPL(drm_gpuva_insert);
@@ -870,6 +903,7 @@ drm_gpuva_remove(struct drm_gpuva *va)
}
__drm_gpuva_remove(va);
+ drm_gpuvm_put(va->vm);
}
EXPORT_SYMBOL_GPL(drm_gpuva_remove);
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
@@ -1782,6 +1782,18 @@ nouveau_uvmm_bo_unmap_all(struct nouveau
}
}
+static void
+nouveau_uvmm_free(struct drm_gpuvm *gpuvm)
+{
+ struct nouveau_uvmm *uvmm = uvmm_from_gpuvm(gpuvm);
+
+ kfree(uvmm);
+}
+
+static const struct drm_gpuvm_ops gpuvm_ops = {
+ .vm_free = nouveau_uvmm_free,
+};
+
int
nouveau_uvmm_ioctl_vm_init(struct drm_device *dev,
void *data,
@@ -1832,7 +1844,7 @@ nouveau_uvmm_ioctl_vm_init(struct drm_de
NOUVEAU_VA_SPACE_END,
init->kernel_managed_addr,
init->kernel_managed_size,
- NULL);
+ &gpuvm_ops);
/* GPUVM takes care from here on. */
drm_gem_object_put(r_obj);
@@ -1851,8 +1863,7 @@ nouveau_uvmm_ioctl_vm_init(struct drm_de
return 0;
out_gpuvm_fini:
- drm_gpuvm_destroy(&uvmm->base);
- kfree(uvmm);
+ drm_gpuvm_put(&uvmm->base);
out_unlock:
mutex_unlock(&cli->mutex);
return ret;
@@ -1904,7 +1915,6 @@ nouveau_uvmm_fini(struct nouveau_uvmm *u
mutex_lock(&cli->mutex);
nouveau_vmm_fini(&uvmm->vmm);
- drm_gpuvm_destroy(&uvmm->base);
- kfree(uvmm);
+ drm_gpuvm_put(&uvmm->base);
mutex_unlock(&cli->mutex);
}
--- a/include/drm/drm_gpuvm.h
+++ b/include/drm/drm_gpuvm.h
@@ -248,6 +248,11 @@ struct drm_gpuvm {
} rb;
/**
+ * @kref: reference count of this object
+ */
+ struct kref kref;
+
+ /**
* @kernel_alloc_node:
*
* &drm_gpuva representing the address space cutout reserved for
@@ -273,7 +278,23 @@ void drm_gpuvm_init(struct drm_gpuvm *gp
u64 start_offset, u64 range,
u64 reserve_offset, u64 reserve_range,
const struct drm_gpuvm_ops *ops);
-void drm_gpuvm_destroy(struct drm_gpuvm *gpuvm);
+
+/**
+ * drm_gpuvm_get() - acquire a struct drm_gpuvm reference
+ * @gpuvm: the &drm_gpuvm to acquire the reference of
+ *
+ * This function acquires an additional reference to @gpuvm. It is illegal to
+ * call this without already holding a reference. No locks required.
+ */
+static inline struct drm_gpuvm *
+drm_gpuvm_get(struct drm_gpuvm *gpuvm)
+{
+ kref_get(&gpuvm->kref);
+
+ return gpuvm;
+}
+
+void drm_gpuvm_put(struct drm_gpuvm *gpuvm);
bool drm_gpuvm_range_valid(struct drm_gpuvm *gpuvm, u64 addr, u64 range);
bool drm_gpuvm_interval_empty(struct drm_gpuvm *gpuvm, u64 addr, u64 range);
@@ -674,6 +695,14 @@ static inline void drm_gpuva_init_from_o
*/
struct drm_gpuvm_ops {
/**
+ * @vm_free: called when the last reference of a struct drm_gpuvm is
+ * dropped
+ *
+ * This callback is mandatory.
+ */
+ void (*vm_free)(struct drm_gpuvm *gpuvm);
+
+ /**
* @op_alloc: called when the &drm_gpuvm allocates
* a struct drm_gpuva_op
*

View File

@ -1,451 +0,0 @@
From 014f831abcb82738e57c0b00db66dfef0798ed67 Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@redhat.com>
Date: Mon, 13 Nov 2023 23:12:00 +0100
Subject: [PATCH] drm/nouveau: use GPUVM common infrastructure
GPUVM provides common infrastructure to track external and evicted GEM
objects as well as locking and validation helpers.
Especially external and evicted object tracking is a huge improvement
compared to the current brute force approach of iterating all mappings
in order to lock and validate the GPUVM's GEM objects. Hence, make us of
it.
Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Reviewed-by: Dave Airlie <airlied@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231113221202.7203-1-dakr@redhat.com
---
drivers/gpu/drm/nouveau/nouveau_bo.c | 4 +-
drivers/gpu/drm/nouveau/nouveau_exec.c | 57 +++-------
drivers/gpu/drm/nouveau/nouveau_exec.h | 4 -
drivers/gpu/drm/nouveau/nouveau_sched.c | 9 +-
drivers/gpu/drm/nouveau/nouveau_sched.h | 7 +-
drivers/gpu/drm/nouveau/nouveau_uvmm.c | 134 +++++++++++++-----------
6 files changed, 100 insertions(+), 115 deletions(-)
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -1056,17 +1056,18 @@ nouveau_bo_move(struct ttm_buffer_object
{
struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
struct nouveau_bo *nvbo = nouveau_bo(bo);
+ struct drm_gem_object *obj = &bo->base;
struct ttm_resource *old_reg = bo->resource;
struct nouveau_drm_tile *new_tile = NULL;
int ret = 0;
-
if (new_reg->mem_type == TTM_PL_TT) {
ret = nouveau_ttm_tt_bind(bo->bdev, bo->ttm, new_reg);
if (ret)
return ret;
}
+ drm_gpuvm_bo_gem_evict(obj, evict);
nouveau_bo_move_ntfy(bo, new_reg);
ret = ttm_bo_wait_ctx(bo, ctx);
if (ret)
@@ -1131,6 +1132,7 @@ out:
out_ntfy:
if (ret) {
nouveau_bo_move_ntfy(bo, bo->resource);
+ drm_gpuvm_bo_gem_evict(obj, !evict);
}
return ret;
}
--- a/drivers/gpu/drm/nouveau/nouveau_exec.c
+++ b/drivers/gpu/drm/nouveau/nouveau_exec.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: MIT
-#include <drm/drm_exec.h>
-
#include "nouveau_drv.h"
#include "nouveau_gem.h"
#include "nouveau_mem.h"
@@ -86,14 +84,12 @@
*/
static int
-nouveau_exec_job_submit(struct nouveau_job *job)
+nouveau_exec_job_submit(struct nouveau_job *job,
+ struct drm_gpuvm_exec *vme)
{
struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
struct nouveau_cli *cli = job->cli;
struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(cli);
- struct drm_exec *exec = &job->exec;
- struct drm_gem_object *obj;
- unsigned long index;
int ret;
/* Create a new fence, but do not emit yet. */
@@ -102,52 +98,29 @@ nouveau_exec_job_submit(struct nouveau_j
return ret;
nouveau_uvmm_lock(uvmm);
- drm_exec_init(exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
- DRM_EXEC_IGNORE_DUPLICATES);
- drm_exec_until_all_locked(exec) {
- struct drm_gpuva *va;
-
- drm_gpuvm_for_each_va(va, &uvmm->base) {
- if (unlikely(va == &uvmm->base.kernel_alloc_node))
- continue;
-
- ret = drm_exec_prepare_obj(exec, va->gem.obj, 1);
- drm_exec_retry_on_contention(exec);
- if (ret)
- goto err_uvmm_unlock;
- }
+ ret = drm_gpuvm_exec_lock(vme);
+ if (ret) {
+ nouveau_uvmm_unlock(uvmm);
+ return ret;
}
nouveau_uvmm_unlock(uvmm);
- drm_exec_for_each_locked_object(exec, index, obj) {
- struct nouveau_bo *nvbo = nouveau_gem_object(obj);
-
- ret = nouveau_bo_validate(nvbo, true, false);
- if (ret)
- goto err_exec_fini;
+ ret = drm_gpuvm_exec_validate(vme);
+ if (ret) {
+ drm_gpuvm_exec_unlock(vme);
+ return ret;
}
return 0;
-
-err_uvmm_unlock:
- nouveau_uvmm_unlock(uvmm);
-err_exec_fini:
- drm_exec_fini(exec);
- return ret;
-
}
static void
-nouveau_exec_job_armed_submit(struct nouveau_job *job)
+nouveau_exec_job_armed_submit(struct nouveau_job *job,
+ struct drm_gpuvm_exec *vme)
{
- struct drm_exec *exec = &job->exec;
- struct drm_gem_object *obj;
- unsigned long index;
-
- drm_exec_for_each_locked_object(exec, index, obj)
- dma_resv_add_fence(obj->resv, job->done_fence, job->resv_usage);
-
- drm_exec_fini(exec);
+ drm_gpuvm_exec_resv_add_fence(vme, job->done_fence,
+ job->resv_usage, job->resv_usage);
+ drm_gpuvm_exec_unlock(vme);
}
static struct dma_fence *
--- a/drivers/gpu/drm/nouveau/nouveau_exec.h
+++ b/drivers/gpu/drm/nouveau/nouveau_exec.h
@@ -3,16 +3,12 @@
#ifndef __NOUVEAU_EXEC_H__
#define __NOUVEAU_EXEC_H__
-#include <drm/drm_exec.h>
-
#include "nouveau_drv.h"
#include "nouveau_sched.h"
struct nouveau_exec_job_args {
struct drm_file *file_priv;
struct nouveau_sched_entity *sched_entity;
-
- struct drm_exec exec;
struct nouveau_channel *chan;
struct {
--- a/drivers/gpu/drm/nouveau/nouveau_sched.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
@@ -263,6 +263,11 @@ nouveau_job_submit(struct nouveau_job *j
{
struct nouveau_sched_entity *entity = to_nouveau_sched_entity(job->base.entity);
struct dma_fence *done_fence = NULL;
+ struct drm_gpuvm_exec vm_exec = {
+ .vm = &nouveau_cli_uvmm(job->cli)->base,
+ .flags = DRM_EXEC_IGNORE_DUPLICATES,
+ .num_fences = 1,
+ };
int ret;
ret = nouveau_job_add_deps(job);
@@ -282,7 +287,7 @@ nouveau_job_submit(struct nouveau_job *j
* successfully.
*/
if (job->ops->submit) {
- ret = job->ops->submit(job);
+ ret = job->ops->submit(job, &vm_exec);
if (ret)
goto err_cleanup;
}
@@ -315,7 +320,7 @@ nouveau_job_submit(struct nouveau_job *j
set_bit(DRM_SCHED_FENCE_DONT_PIPELINE, &job->done_fence->flags);
if (job->ops->armed_submit)
- job->ops->armed_submit(job);
+ job->ops->armed_submit(job, &vm_exec);
nouveau_job_fence_attach(job);
--- a/drivers/gpu/drm/nouveau/nouveau_sched.h
+++ b/drivers/gpu/drm/nouveau/nouveau_sched.h
@@ -5,7 +5,7 @@
#include <linux/types.h>
-#include <drm/drm_exec.h>
+#include <drm/drm_gpuvm.h>
#include <drm/gpu_scheduler.h>
#include "nouveau_drv.h"
@@ -54,7 +54,6 @@ struct nouveau_job {
struct drm_file *file_priv;
struct nouveau_cli *cli;
- struct drm_exec exec;
enum dma_resv_usage resv_usage;
struct dma_fence *done_fence;
@@ -76,8 +75,8 @@ struct nouveau_job {
/* If .submit() returns without any error, it is guaranteed that
* armed_submit() is called.
*/
- int (*submit)(struct nouveau_job *);
- void (*armed_submit)(struct nouveau_job *);
+ int (*submit)(struct nouveau_job *, struct drm_gpuvm_exec *);
+ void (*armed_submit)(struct nouveau_job *, struct drm_gpuvm_exec *);
struct dma_fence *(*run)(struct nouveau_job *);
void (*free)(struct nouveau_job *);
enum drm_gpu_sched_stat (*timeout)(struct nouveau_job *);
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
@@ -438,8 +438,9 @@ nouveau_uvma_region_complete(struct nouv
static void
op_map_prepare_unwind(struct nouveau_uvma *uvma)
{
+ struct drm_gpuva *va = &uvma->va;
nouveau_uvma_gem_put(uvma);
- drm_gpuva_remove(&uvma->va);
+ drm_gpuva_remove(va);
nouveau_uvma_free(uvma);
}
@@ -468,6 +469,7 @@ nouveau_uvmm_sm_prepare_unwind(struct no
break;
case DRM_GPUVA_OP_REMAP: {
struct drm_gpuva_op_remap *r = &op->remap;
+ struct drm_gpuva *va = r->unmap->va;
if (r->next)
op_map_prepare_unwind(new->next);
@@ -475,7 +477,7 @@ nouveau_uvmm_sm_prepare_unwind(struct no
if (r->prev)
op_map_prepare_unwind(new->prev);
- op_unmap_prepare_unwind(r->unmap->va);
+ op_unmap_prepare_unwind(va);
break;
}
case DRM_GPUVA_OP_UNMAP:
@@ -634,6 +636,7 @@ nouveau_uvmm_sm_prepare(struct nouveau_u
goto unwind;
}
}
+
break;
}
case DRM_GPUVA_OP_REMAP: {
@@ -1135,12 +1138,53 @@ bind_link_gpuvas(struct bind_job_op *bop
}
static int
-nouveau_uvmm_bind_job_submit(struct nouveau_job *job)
+bind_lock_validate(struct nouveau_job *job, struct drm_exec *exec,
+ unsigned int num_fences)
+{
+ struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(job);
+ struct bind_job_op *op;
+ int ret;
+
+ list_for_each_op(op, &bind_job->ops) {
+ struct drm_gpuva_op *va_op;
+
+ if (!op->ops)
+ continue;
+
+ drm_gpuva_for_each_op(va_op, op->ops) {
+ struct drm_gem_object *obj = op_gem_obj(va_op);
+
+ if (unlikely(!obj))
+ continue;
+
+ ret = drm_exec_prepare_obj(exec, obj, num_fences);
+ if (ret)
+ return ret;
+
+ /* Don't validate GEMs backing mappings we're about to
+ * unmap, it's not worth the effort.
+ */
+ if (va_op->op == DRM_GPUVA_OP_UNMAP)
+ continue;
+
+ ret = nouveau_bo_validate(nouveau_gem_object(obj),
+ true, false);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int
+nouveau_uvmm_bind_job_submit(struct nouveau_job *job,
+ struct drm_gpuvm_exec *vme)
{
struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(job->cli);
struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(job);
struct nouveau_sched_entity *entity = job->entity;
- struct drm_exec *exec = &job->exec;
+ struct drm_exec *exec = &vme->exec;
struct bind_job_op *op;
int ret;
@@ -1157,6 +1201,8 @@ nouveau_uvmm_bind_job_submit(struct nouv
dma_resv_unlock(obj->resv);
if (IS_ERR(op->vm_bo))
return PTR_ERR(op->vm_bo);
+
+ drm_gpuvm_bo_extobj_add(op->vm_bo);
}
ret = bind_validate_op(job, op);
@@ -1179,6 +1225,7 @@ nouveau_uvmm_bind_job_submit(struct nouv
* unwind all GPU VA space changes on failure.
*/
nouveau_uvmm_lock(uvmm);
+
list_for_each_op(op, &bind_job->ops) {
switch (op->op) {
case OP_MAP_SPARSE:
@@ -1290,57 +1337,13 @@ nouveau_uvmm_bind_job_submit(struct nouv
}
}
- drm_exec_init(exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
- DRM_EXEC_IGNORE_DUPLICATES);
+ drm_exec_init(exec, vme->flags);
drm_exec_until_all_locked(exec) {
- list_for_each_op(op, &bind_job->ops) {
- struct drm_gpuva_op *va_op;
-
- if (IS_ERR_OR_NULL(op->ops))
- continue;
-
- drm_gpuva_for_each_op(va_op, op->ops) {
- struct drm_gem_object *obj = op_gem_obj(va_op);
-
- if (unlikely(!obj))
- continue;
-
- ret = drm_exec_prepare_obj(exec, obj, 1);
- drm_exec_retry_on_contention(exec);
- if (ret) {
- op = list_last_op(&bind_job->ops);
- goto unwind;
- }
- }
- }
- }
-
- list_for_each_op(op, &bind_job->ops) {
- struct drm_gpuva_op *va_op;
-
- if (IS_ERR_OR_NULL(op->ops))
- continue;
-
- drm_gpuva_for_each_op(va_op, op->ops) {
- struct drm_gem_object *obj = op_gem_obj(va_op);
- struct nouveau_bo *nvbo;
-
- if (unlikely(!obj))
- continue;
-
- /* Don't validate GEMs backing mappings we're about to
- * unmap, it's not worth the effort.
- */
- if (unlikely(va_op->op == DRM_GPUVA_OP_UNMAP))
- continue;
-
- nvbo = nouveau_gem_object(obj);
- nouveau_bo_placement_set(nvbo, nvbo->valid_domains, 0);
- ret = nouveau_bo_validate(nvbo, true, false);
- if (ret) {
- op = list_last_op(&bind_job->ops);
- goto unwind;
- }
+ ret = bind_lock_validate(job, exec, vme->num_fences);
+ drm_exec_retry_on_contention(exec);
+ if (ret) {
+ op = list_last_op(&bind_job->ops);
+ goto unwind;
}
}
@@ -1415,21 +1418,17 @@ unwind:
}
nouveau_uvmm_unlock(uvmm);
- drm_exec_fini(exec);
+ drm_gpuvm_exec_unlock(vme);
return ret;
}
static void
-nouveau_uvmm_bind_job_armed_submit(struct nouveau_job *job)
+nouveau_uvmm_bind_job_armed_submit(struct nouveau_job *job,
+ struct drm_gpuvm_exec *vme)
{
- struct drm_exec *exec = &job->exec;
- struct drm_gem_object *obj;
- unsigned long index;
-
- drm_exec_for_each_locked_object(exec, index, obj)
- dma_resv_add_fence(obj->resv, job->done_fence, job->resv_usage);
-
- drm_exec_fini(exec);
+ drm_gpuvm_exec_resv_add_fence(vme, job->done_fence,
+ job->resv_usage, job->resv_usage);
+ drm_gpuvm_exec_unlock(vme);
}
static struct dma_fence *
@@ -1817,8 +1816,18 @@ nouveau_uvmm_free(struct drm_gpuvm *gpuv
kfree(uvmm);
}
+static int
+nouveau_uvmm_bo_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
+{
+ struct nouveau_bo *nvbo = nouveau_gem_object(vm_bo->obj);
+
+ nouveau_bo_placement_set(nvbo, nvbo->valid_domains, 0);
+ return nouveau_bo_validate(nvbo, true, false);
+}
+
static const struct drm_gpuvm_ops gpuvm_ops = {
.vm_free = nouveau_uvmm_free,
+ .vm_bo_validate = nouveau_uvmm_bo_validate,
};
int

View File

@ -1,60 +0,0 @@
From a191f73d85484f804284674c14f2d9f572c18adb Mon Sep 17 00:00:00 2001
From: Donald Robson <donald.robson@imgtec.com>
Date: Wed, 22 Nov 2023 16:34:23 +0000
Subject: [PATCH] drm/gpuvm: Helper to get range of unmap from a remap op.
Determining the start and range of the unmap stage of a remap op is a
common piece of code currently implemented by multiple drivers. Add a
helper for this.
Changes since v7:
- Renamed helper to drm_gpuva_op_remap_to_unmap_range()
- Improved documentation
Changes since v6:
- Remove use of __always_inline
Signed-off-by: Donald Robson <donald.robson@imgtec.com>
Signed-off-by: Sarah Walker <sarah.walker@imgtec.com>
Reviewed-by: Danilo Krummrich <dakr@redhat.com>
Link: https://lore.kernel.org/r/8a0a5b5eeec459d3c60fcdaa5a638ad14a18a59e.1700668843.git.donald.robson@imgtec.com
Signed-off-by: Maxime Ripard <mripard@kernel.org>
---
include/drm/drm_gpuvm.h | 28 ++++++++++++++++++++++++++++
1 file changed, 28 insertions(+)
--- a/include/drm/drm_gpuvm.h
+++ b/include/drm/drm_gpuvm.h
@@ -1213,4 +1213,32 @@ void drm_gpuva_remap(struct drm_gpuva *p
void drm_gpuva_unmap(struct drm_gpuva_op_unmap *op);
+/**
+ * drm_gpuva_op_remap_to_unmap_range() - Helper to get the start and range of
+ * the unmap stage of a remap op.
+ * @op: Remap op.
+ * @start_addr: Output pointer for the start of the required unmap.
+ * @range: Output pointer for the length of the required unmap.
+ *
+ * The given start address and range will be set such that they represent the
+ * range of the address space that was previously covered by the mapping being
+ * re-mapped, but is now empty.
+ */
+static inline void
+drm_gpuva_op_remap_to_unmap_range(const struct drm_gpuva_op_remap *op,
+ u64 *start_addr, u64 *range)
+{
+ const u64 va_start = op->prev ?
+ op->prev->va.addr + op->prev->va.range :
+ op->unmap->va->va.addr;
+ const u64 va_end = op->next ?
+ op->next->va.addr :
+ op->unmap->va->va.addr + op->unmap->va->va.range;
+
+ if (start_addr)
+ *start_addr = va_start;
+ if (range)
+ *range = va_end - va_start;
+}
+
#endif /* __DRM_GPUVM_H__ */

View File

@ -1,41 +0,0 @@
From b9c02e1052650af56d4487efa5fade3fb70e3653 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Mon, 6 Nov 2023 12:48:27 +0100
Subject: [PATCH] drm/gpuvm: Fix deprecated license identifier
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
"GPL-2.0-only" in the license header was incorrectly changed to the
now deprecated "GPL-2.0". Fix.
Cc: Maxime Ripard <mripard@kernel.org>
Cc: Danilo Krummrich <dakr@redhat.com>
Reported-by: David Edelsohn <dje.gcc@gmail.com>
Closes: https://lore.kernel.org/dri-devel/5lfrhdpkwhpgzipgngojs3tyqfqbesifzu5nf4l5q3nhfdhcf2@25nmiq7tfrew/T/#m5c356d68815711eea30dd94cc6f7ea8cd4344fe3
Fixes: f7749a549b4f ("drm/gpuvm: Dual-licence the drm_gpuvm code GPL-2.0 OR MIT")
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Acked-by: Maxime Ripard <mripard@kernel.org>
Acked-by: Danilo Krummrich <dakr@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231106114827.62492-1-thomas.hellstrom@linux.intel.com
---
drivers/gpu/drm/drm_gpuvm.c | 2 +-
include/drm/drm_gpuvm.h | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
--- a/drivers/gpu/drm/drm_gpuvm.c
+++ b/drivers/gpu/drm/drm_gpuvm.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 OR MIT
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Copyright (c) 2022 Red Hat.
*
--- a/include/drm/drm_gpuvm.h
+++ b/include/drm/drm_gpuvm.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
#ifndef __DRM_GPUVM_H__
#define __DRM_GPUVM_H__

View File

@ -1,142 +0,0 @@
From e759f2ca29d918d3db57a61cdf838025beb03465 Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@redhat.com>
Date: Wed, 29 Nov 2023 23:08:00 +0100
Subject: [PATCH] drm/gpuvm: fall back to drm_exec_lock_obj()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Fall back to drm_exec_lock_obj() if num_fences is zero for the
drm_gpuvm_prepare_* function family.
Otherwise dma_resv_reserve_fences() would actually allocate slots even
though num_fences is zero.
Cc: Christian König <christian.koenig@amd.com>
Acked-by: Donald Robson <donald.robson@imgtec.com>
Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231129220835.297885-2-dakr@redhat.com
---
drivers/gpu/drm/drm_gpuvm.c | 43 ++++++++++++++++++++++++++++++++-----
include/drm/drm_gpuvm.h | 23 +++-----------------
2 files changed, 41 insertions(+), 25 deletions(-)
--- a/drivers/gpu/drm/drm_gpuvm.c
+++ b/drivers/gpu/drm/drm_gpuvm.c
@@ -1080,6 +1080,37 @@ drm_gpuvm_put(struct drm_gpuvm *gpuvm)
EXPORT_SYMBOL_GPL(drm_gpuvm_put);
static int
+exec_prepare_obj(struct drm_exec *exec, struct drm_gem_object *obj,
+ unsigned int num_fences)
+{
+ return num_fences ? drm_exec_prepare_obj(exec, obj, num_fences) :
+ drm_exec_lock_obj(exec, obj);
+}
+
+/**
+ * drm_gpuvm_prepare_vm() - prepare the GPUVMs common dma-resv
+ * @gpuvm: the &drm_gpuvm
+ * @exec: the &drm_exec context
+ * @num_fences: the amount of &dma_fences to reserve
+ *
+ * Calls drm_exec_prepare_obj() for the GPUVMs dummy &drm_gem_object; if
+ * @num_fences is zero drm_exec_lock_obj() is called instead.
+ *
+ * Using this function directly, it is the drivers responsibility to call
+ * drm_exec_init() and drm_exec_fini() accordingly.
+ *
+ * Returns: 0 on success, negative error code on failure.
+ */
+int
+drm_gpuvm_prepare_vm(struct drm_gpuvm *gpuvm,
+ struct drm_exec *exec,
+ unsigned int num_fences)
+{
+ return exec_prepare_obj(exec, gpuvm->r_obj, num_fences);
+}
+EXPORT_SYMBOL_GPL(drm_gpuvm_prepare_vm);
+
+static int
__drm_gpuvm_prepare_objects(struct drm_gpuvm *gpuvm,
struct drm_exec *exec,
unsigned int num_fences)
@@ -1089,7 +1120,7 @@ __drm_gpuvm_prepare_objects(struct drm_g
int ret = 0;
for_each_vm_bo_in_list(gpuvm, extobj, &extobjs, vm_bo) {
- ret = drm_exec_prepare_obj(exec, vm_bo->obj, num_fences);
+ ret = exec_prepare_obj(exec, vm_bo->obj, num_fences);
if (ret)
break;
}
@@ -1110,7 +1141,7 @@ drm_gpuvm_prepare_objects_locked(struct
drm_gpuvm_resv_assert_held(gpuvm);
list_for_each_entry(vm_bo, &gpuvm->extobj.list, list.entry.extobj) {
- ret = drm_exec_prepare_obj(exec, vm_bo->obj, num_fences);
+ ret = exec_prepare_obj(exec, vm_bo->obj, num_fences);
if (ret)
break;
@@ -1128,7 +1159,8 @@ drm_gpuvm_prepare_objects_locked(struct
* @num_fences: the amount of &dma_fences to reserve
*
* Calls drm_exec_prepare_obj() for all &drm_gem_objects the given
- * &drm_gpuvm contains mappings of.
+ * &drm_gpuvm contains mappings of; if @num_fences is zero drm_exec_lock_obj()
+ * is called instead.
*
* Using this function directly, it is the drivers responsibility to call
* drm_exec_init() and drm_exec_fini() accordingly.
@@ -1165,7 +1197,8 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_prepare_obje
* @num_fences: the amount of &dma_fences to reserve
*
* Calls drm_exec_prepare_obj() for all &drm_gem_objects mapped between @addr
- * and @addr + @range.
+ * and @addr + @range; if @num_fences is zero drm_exec_lock_obj() is called
+ * instead.
*
* Returns: 0 on success, negative error code on failure.
*/
@@ -1180,7 +1213,7 @@ drm_gpuvm_prepare_range(struct drm_gpuvm
drm_gpuvm_for_each_va_range(va, gpuvm, addr, end) {
struct drm_gem_object *obj = va->gem.obj;
- ret = drm_exec_prepare_obj(exec, obj, num_fences);
+ ret = exec_prepare_obj(exec, obj, num_fences);
if (ret)
return ret;
}
--- a/include/drm/drm_gpuvm.h
+++ b/include/drm/drm_gpuvm.h
@@ -544,26 +544,9 @@ struct drm_gpuvm_exec {
} extra;
};
-/**
- * drm_gpuvm_prepare_vm() - prepare the GPUVMs common dma-resv
- * @gpuvm: the &drm_gpuvm
- * @exec: the &drm_exec context
- * @num_fences: the amount of &dma_fences to reserve
- *
- * Calls drm_exec_prepare_obj() for the GPUVMs dummy &drm_gem_object.
- *
- * Using this function directly, it is the drivers responsibility to call
- * drm_exec_init() and drm_exec_fini() accordingly.
- *
- * Returns: 0 on success, negative error code on failure.
- */
-static inline int
-drm_gpuvm_prepare_vm(struct drm_gpuvm *gpuvm,
- struct drm_exec *exec,
- unsigned int num_fences)
-{
- return drm_exec_prepare_obj(exec, gpuvm->r_obj, num_fences);
-}
+int drm_gpuvm_prepare_vm(struct drm_gpuvm *gpuvm,
+ struct drm_exec *exec,
+ unsigned int num_fences);
int drm_gpuvm_prepare_objects(struct drm_gpuvm *gpuvm,
struct drm_exec *exec,

View File

@ -1,59 +0,0 @@
From c50a291d621aa7abaa27b05f56d450a388b64948 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Mon, 4 Dec 2023 16:14:06 +0100
Subject: [PATCH] drm/gpuvm: Let drm_gpuvm_bo_put() report when the vm_bo
object is destroyed
Some users need to release resources attached to the vm_bo object when
it's destroyed. In Panthor's case, we need to release the pin ref so
BO pages can be returned to the system when all GPU mappings are gone.
This could be done through a custom drm_gpuvm::vm_bo_free() hook, but
this has all sort of locking implications that would force us to expose
a drm_gem_shmem_unpin_locked() helper, not to mention the fact that
having a ::vm_bo_free() implementation without a ::vm_bo_alloc() one
seems odd. So let's keep things simple, and extend drm_gpuvm_bo_put()
to report when the object is destroyed.
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Danilo Krummrich <dakr@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231204151406.1977285-1-boris.brezillon@collabora.com
---
drivers/gpu/drm/drm_gpuvm.c | 8 ++++++--
include/drm/drm_gpuvm.h | 2 +-
2 files changed, 7 insertions(+), 3 deletions(-)
--- a/drivers/gpu/drm/drm_gpuvm.c
+++ b/drivers/gpu/drm/drm_gpuvm.c
@@ -1529,14 +1529,18 @@ drm_gpuvm_bo_destroy(struct kref *kref)
* hold the dma-resv or driver specific GEM gpuva lock.
*
* This function may only be called from non-atomic context.
+ *
+ * Returns: true if vm_bo was destroyed, false otherwise.
*/
-void
+bool
drm_gpuvm_bo_put(struct drm_gpuvm_bo *vm_bo)
{
might_sleep();
if (vm_bo)
- kref_put(&vm_bo->kref, drm_gpuvm_bo_destroy);
+ return !!kref_put(&vm_bo->kref, drm_gpuvm_bo_destroy);
+
+ return false;
}
EXPORT_SYMBOL_GPL(drm_gpuvm_bo_put);
--- a/include/drm/drm_gpuvm.h
+++ b/include/drm/drm_gpuvm.h
@@ -721,7 +721,7 @@ drm_gpuvm_bo_get(struct drm_gpuvm_bo *vm
return vm_bo;
}
-void drm_gpuvm_bo_put(struct drm_gpuvm_bo *vm_bo);
+bool drm_gpuvm_bo_put(struct drm_gpuvm_bo *vm_bo);
struct drm_gpuvm_bo *
drm_gpuvm_bo_find(struct drm_gpuvm *gpuvm,

View File

@ -1,405 +0,0 @@
From 56e449603f0ac580700621a356d35d5716a62ce5 Mon Sep 17 00:00:00 2001
From: Luben Tuikov <luben.tuikov@amd.com>
Date: Sat, 14 Oct 2023 21:15:35 -0400
Subject: [PATCH] drm/sched: Convert the GPU scheduler to variable number of
run-queues
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The GPU scheduler has now a variable number of run-queues, which are set up at
drm_sched_init() time. This way, each driver announces how many run-queues it
requires (supports) per each GPU scheduler it creates. Note, that run-queues
correspond to scheduler "priorities", thus if the number of run-queues is set
to 1 at drm_sched_init(), then that scheduler supports a single run-queue,
i.e. single "priority". If a driver further sets a single entity per
run-queue, then this creates a 1-to-1 correspondence between a scheduler and
a scheduled entity.
Cc: Lucas Stach <l.stach@pengutronix.de>
Cc: Russell King <linux+etnaviv@armlinux.org.uk>
Cc: Qiang Yu <yuq825@gmail.com>
Cc: Rob Clark <robdclark@gmail.com>
Cc: Abhinav Kumar <quic_abhinavk@quicinc.com>
Cc: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Cc: Danilo Krummrich <dakr@redhat.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Boris Brezillon <boris.brezillon@collabora.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Emma Anholt <emma@anholt.net>
Cc: etnaviv@lists.freedesktop.org
Cc: lima@lists.freedesktop.org
Cc: linux-arm-msm@vger.kernel.org
Cc: freedreno@lists.freedesktop.org
Cc: nouveau@lists.freedesktop.org
Cc: dri-devel@lists.freedesktop.org
Signed-off-by: Luben Tuikov <luben.tuikov@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Link: https://lore.kernel.org/r/20231023032251.164775-1-luben.tuikov@amd.com
---
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 4 +-
drivers/gpu/drm/etnaviv/etnaviv_sched.c | 1 +
drivers/gpu/drm/lima/lima_sched.c | 4 +-
drivers/gpu/drm/msm/msm_ringbuffer.c | 5 +-
drivers/gpu/drm/nouveau/nouveau_sched.c | 1 +
drivers/gpu/drm/panfrost/panfrost_job.c | 1 +
drivers/gpu/drm/scheduler/sched_entity.c | 18 +++++-
drivers/gpu/drm/scheduler/sched_main.c | 74 ++++++++++++++++++----
drivers/gpu/drm/v3d/v3d_sched.c | 5 ++
include/drm/gpu_scheduler.h | 9 ++-
11 files changed, 98 insertions(+), 25 deletions(-)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2277,6 +2277,7 @@ static int amdgpu_device_init_schedulers
}
r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
+ DRM_SCHED_PRIORITY_COUNT,
ring->num_hw_submission, 0,
timeout, adev->reset_domain->wq,
ring->sched_score, ring->name,
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -326,8 +326,8 @@ void amdgpu_job_stop_all_jobs_on_sched(s
int i;
/* Signal all jobs not yet scheduled */
- for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
- struct drm_sched_rq *rq = &sched->sched_rq[i];
+ for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
+ struct drm_sched_rq *rq = sched->sched_rq[i];
spin_lock(&rq->lock);
list_for_each_entry(s_entity, &rq->entities, list) {
while ((s_job = to_drm_sched_job(spsc_queue_pop(&s_entity->job_queue)))) {
--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@@ -134,6 +134,7 @@ int etnaviv_sched_init(struct etnaviv_gp
int ret;
ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops,
+ DRM_SCHED_PRIORITY_COUNT,
etnaviv_hw_jobs_limit, etnaviv_job_hang_limit,
msecs_to_jiffies(500), NULL, NULL,
dev_name(gpu->dev), gpu->dev);
--- a/drivers/gpu/drm/lima/lima_sched.c
+++ b/drivers/gpu/drm/lima/lima_sched.c
@@ -495,7 +495,9 @@ int lima_sched_pipe_init(struct lima_sch
INIT_WORK(&pipe->recover_work, lima_sched_recover_work);
- return drm_sched_init(&pipe->base, &lima_sched_ops, 1,
+ return drm_sched_init(&pipe->base, &lima_sched_ops,
+ DRM_SCHED_PRIORITY_COUNT,
+ 1,
lima_job_hang_limit,
msecs_to_jiffies(timeout), NULL,
NULL, name, pipe->ldev->dev);
--- a/drivers/gpu/drm/msm/msm_ringbuffer.c
+++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
@@ -98,8 +98,9 @@ struct msm_ringbuffer *msm_ringbuffer_ne
sched_timeout = MAX_SCHEDULE_TIMEOUT;
ret = drm_sched_init(&ring->sched, &msm_sched_ops,
- num_hw_submissions, 0, sched_timeout,
- NULL, NULL, to_msm_bo(ring->bo)->name, gpu->dev->dev);
+ DRM_SCHED_PRIORITY_COUNT,
+ num_hw_submissions, 0, sched_timeout,
+ NULL, NULL, to_msm_bo(ring->bo)->name, gpu->dev->dev);
if (ret) {
goto fail;
}
--- a/drivers/gpu/drm/nouveau/nouveau_sched.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
@@ -441,6 +441,7 @@ int nouveau_sched_init(struct nouveau_dr
return -ENOMEM;
return drm_sched_init(sched, &nouveau_sched_ops,
+ DRM_SCHED_PRIORITY_COUNT,
NOUVEAU_SCHED_HW_SUBMISSIONS, 0, job_hang_limit,
NULL, NULL, "nouveau_sched", drm->dev->dev);
}
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -832,6 +832,7 @@ int panfrost_job_init(struct panfrost_de
ret = drm_sched_init(&js->queue[j].sched,
&panfrost_sched_ops,
+ DRM_SCHED_PRIORITY_COUNT,
nentries, 0,
msecs_to_jiffies(JOB_TIMEOUT_MS),
pfdev->reset.wq,
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -75,8 +75,20 @@ int drm_sched_entity_init(struct drm_sch
RCU_INIT_POINTER(entity->last_scheduled, NULL);
RB_CLEAR_NODE(&entity->rb_tree_node);
- if(num_sched_list)
- entity->rq = &sched_list[0]->sched_rq[entity->priority];
+ if (!sched_list[0]->sched_rq) {
+ /* Warn drivers not to do this and to fix their DRM
+ * calling order.
+ */
+ pr_warn("%s: called with uninitialized scheduler\n", __func__);
+ } else if (num_sched_list) {
+ /* The "priority" of an entity cannot exceed the number
+ * of run-queues of a scheduler.
+ */
+ if (entity->priority >= sched_list[0]->num_rqs)
+ entity->priority = max_t(u32, sched_list[0]->num_rqs,
+ DRM_SCHED_PRIORITY_MIN);
+ entity->rq = sched_list[0]->sched_rq[entity->priority];
+ }
init_completion(&entity->entity_idle);
@@ -535,7 +547,7 @@ void drm_sched_entity_select_rq(struct d
spin_lock(&entity->rq_lock);
sched = drm_sched_pick_best(entity->sched_list, entity->num_sched_list);
- rq = sched ? &sched->sched_rq[entity->priority] : NULL;
+ rq = sched ? sched->sched_rq[entity->priority] : NULL;
if (rq != entity->rq) {
drm_sched_rq_remove_entity(entity->rq, entity);
entity->rq = rq;
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -632,8 +632,14 @@ int drm_sched_job_init(struct drm_sched_
struct drm_sched_entity *entity,
void *owner)
{
- if (!entity->rq)
+ if (!entity->rq) {
+ /* This will most likely be followed by missing frames
+ * or worse--a blank screen--leave a trail in the
+ * logs, so this can be debugged easier.
+ */
+ drm_err(job->sched, "%s: entity has no rq!\n", __func__);
return -ENOENT;
+ }
/*
* We don't know for sure how the user has allocated. Thus, zero the
@@ -679,7 +685,7 @@ void drm_sched_job_arm(struct drm_sched_
sched = entity->rq->sched;
job->sched = sched;
- job->s_priority = entity->rq - sched->sched_rq;
+ job->s_priority = entity->priority;
job->id = atomic64_inc_return(&sched->job_id_count);
drm_sched_fence_init(job->s_fence, job->entity);
@@ -896,10 +902,10 @@ drm_sched_select_entity(struct drm_gpu_s
return NULL;
/* Kernel run queue has higher priority than normal run queue*/
- for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
+ for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
entity = drm_sched_policy == DRM_SCHED_POLICY_FIFO ?
- drm_sched_rq_select_entity_fifo(&sched->sched_rq[i]) :
- drm_sched_rq_select_entity_rr(&sched->sched_rq[i]);
+ drm_sched_rq_select_entity_fifo(sched->sched_rq[i]) :
+ drm_sched_rq_select_entity_rr(sched->sched_rq[i]);
if (entity)
break;
}
@@ -1079,6 +1085,7 @@ static int drm_sched_main(void *param)
*
* @sched: scheduler instance
* @ops: backend operations for this scheduler
+ * @num_rqs: number of runqueues, one for each priority, up to DRM_SCHED_PRIORITY_COUNT
* @hw_submission: number of hw submissions that can be in flight
* @hang_limit: number of times to allow a job to hang before dropping it
* @timeout: timeout value in jiffies for the scheduler
@@ -1092,11 +1099,12 @@ static int drm_sched_main(void *param)
*/
int drm_sched_init(struct drm_gpu_scheduler *sched,
const struct drm_sched_backend_ops *ops,
- unsigned hw_submission, unsigned hang_limit,
+ u32 num_rqs, uint32_t hw_submission, unsigned int hang_limit,
long timeout, struct workqueue_struct *timeout_wq,
atomic_t *score, const char *name, struct device *dev)
{
int i, ret;
+
sched->ops = ops;
sched->hw_submission_limit = hw_submission;
sched->name = name;
@@ -1105,8 +1113,36 @@ int drm_sched_init(struct drm_gpu_schedu
sched->hang_limit = hang_limit;
sched->score = score ? score : &sched->_score;
sched->dev = dev;
- for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_COUNT; i++)
- drm_sched_rq_init(sched, &sched->sched_rq[i]);
+
+ if (num_rqs > DRM_SCHED_PRIORITY_COUNT) {
+ /* This is a gross violation--tell drivers what the problem is.
+ */
+ drm_err(sched, "%s: num_rqs cannot be greater than DRM_SCHED_PRIORITY_COUNT\n",
+ __func__);
+ return -EINVAL;
+ } else if (sched->sched_rq) {
+ /* Not an error, but warn anyway so drivers can
+ * fine-tune their DRM calling order, and return all
+ * is good.
+ */
+ drm_warn(sched, "%s: scheduler already initialized!\n", __func__);
+ return 0;
+ }
+
+ sched->sched_rq = kmalloc_array(num_rqs, sizeof(*sched->sched_rq),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!sched->sched_rq) {
+ drm_err(sched, "%s: out of memory for sched_rq\n", __func__);
+ return -ENOMEM;
+ }
+ sched->num_rqs = num_rqs;
+ ret = -ENOMEM;
+ for (i = DRM_SCHED_PRIORITY_MIN; i < sched->num_rqs; i++) {
+ sched->sched_rq[i] = kzalloc(sizeof(*sched->sched_rq[i]), GFP_KERNEL);
+ if (!sched->sched_rq[i])
+ goto Out_unroll;
+ drm_sched_rq_init(sched, sched->sched_rq[i]);
+ }
init_waitqueue_head(&sched->wake_up_worker);
init_waitqueue_head(&sched->job_scheduled);
@@ -1123,11 +1159,18 @@ int drm_sched_init(struct drm_gpu_schedu
ret = PTR_ERR(sched->thread);
sched->thread = NULL;
DRM_DEV_ERROR(sched->dev, "Failed to create scheduler for %s.\n", name);
- return ret;
+ goto Out_unroll;
}
sched->ready = true;
return 0;
+Out_unroll:
+ for (--i ; i >= DRM_SCHED_PRIORITY_MIN; i--)
+ kfree(sched->sched_rq[i]);
+ kfree(sched->sched_rq);
+ sched->sched_rq = NULL;
+ drm_err(sched, "%s: Failed to setup GPU scheduler--out of memory\n", __func__);
+ return ret;
}
EXPORT_SYMBOL(drm_sched_init);
@@ -1146,8 +1189,8 @@ void drm_sched_fini(struct drm_gpu_sched
if (sched->thread)
kthread_stop(sched->thread);
- for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
- struct drm_sched_rq *rq = &sched->sched_rq[i];
+ for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
+ struct drm_sched_rq *rq = sched->sched_rq[i];
spin_lock(&rq->lock);
list_for_each_entry(s_entity, &rq->entities, list)
@@ -1158,7 +1201,7 @@ void drm_sched_fini(struct drm_gpu_sched
*/
s_entity->stopped = true;
spin_unlock(&rq->lock);
-
+ kfree(sched->sched_rq[i]);
}
/* Wakeup everyone stuck in drm_sched_entity_flush for this scheduler */
@@ -1168,6 +1211,8 @@ void drm_sched_fini(struct drm_gpu_sched
cancel_delayed_work_sync(&sched->work_tdr);
sched->ready = false;
+ kfree(sched->sched_rq);
+ sched->sched_rq = NULL;
}
EXPORT_SYMBOL(drm_sched_fini);
@@ -1194,9 +1239,10 @@ void drm_sched_increase_karma(struct drm
if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) {
atomic_inc(&bad->karma);
- for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_KERNEL;
+ for (i = DRM_SCHED_PRIORITY_MIN;
+ i < min_t(typeof(sched->num_rqs), sched->num_rqs, DRM_SCHED_PRIORITY_KERNEL);
i++) {
- struct drm_sched_rq *rq = &sched->sched_rq[i];
+ struct drm_sched_rq *rq = sched->sched_rq[i];
spin_lock(&rq->lock);
list_for_each_entry_safe(entity, tmp, &rq->entities, list) {
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -389,6 +389,7 @@ v3d_sched_init(struct v3d_dev *v3d)
ret = drm_sched_init(&v3d->queue[V3D_BIN].sched,
&v3d_bin_sched_ops,
+ DRM_SCHED_PRIORITY_COUNT,
hw_jobs_limit, job_hang_limit,
msecs_to_jiffies(hang_limit_ms), NULL,
NULL, "v3d_bin", v3d->drm.dev);
@@ -397,6 +398,7 @@ v3d_sched_init(struct v3d_dev *v3d)
ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched,
&v3d_render_sched_ops,
+ DRM_SCHED_PRIORITY_COUNT,
hw_jobs_limit, job_hang_limit,
msecs_to_jiffies(hang_limit_ms), NULL,
NULL, "v3d_render", v3d->drm.dev);
@@ -405,6 +407,7 @@ v3d_sched_init(struct v3d_dev *v3d)
ret = drm_sched_init(&v3d->queue[V3D_TFU].sched,
&v3d_tfu_sched_ops,
+ DRM_SCHED_PRIORITY_COUNT,
hw_jobs_limit, job_hang_limit,
msecs_to_jiffies(hang_limit_ms), NULL,
NULL, "v3d_tfu", v3d->drm.dev);
@@ -414,6 +417,7 @@ v3d_sched_init(struct v3d_dev *v3d)
if (v3d_has_csd(v3d)) {
ret = drm_sched_init(&v3d->queue[V3D_CSD].sched,
&v3d_csd_sched_ops,
+ DRM_SCHED_PRIORITY_COUNT,
hw_jobs_limit, job_hang_limit,
msecs_to_jiffies(hang_limit_ms), NULL,
NULL, "v3d_csd", v3d->drm.dev);
@@ -422,6 +426,7 @@ v3d_sched_init(struct v3d_dev *v3d)
ret = drm_sched_init(&v3d->queue[V3D_CACHE_CLEAN].sched,
&v3d_cache_clean_sched_ops,
+ DRM_SCHED_PRIORITY_COUNT,
hw_jobs_limit, job_hang_limit,
msecs_to_jiffies(hang_limit_ms), NULL,
NULL, "v3d_cache_clean", v3d->drm.dev);
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -471,7 +471,9 @@ struct drm_sched_backend_ops {
* @hw_submission_limit: the max size of the hardware queue.
* @timeout: the time after which a job is removed from the scheduler.
* @name: name of the ring for which this scheduler is being used.
- * @sched_rq: priority wise array of run queues.
+ * @num_rqs: Number of run-queues. This is at most DRM_SCHED_PRIORITY_COUNT,
+ * as there's usually one run-queue per priority, but could be less.
+ * @sched_rq: An allocated array of run-queues of size @num_rqs;
* @wake_up_worker: the wait queue on which the scheduler sleeps until a job
* is ready to be scheduled.
* @job_scheduled: once @drm_sched_entity_do_release is called the scheduler
@@ -500,7 +502,8 @@ struct drm_gpu_scheduler {
uint32_t hw_submission_limit;
long timeout;
const char *name;
- struct drm_sched_rq sched_rq[DRM_SCHED_PRIORITY_COUNT];
+ u32 num_rqs;
+ struct drm_sched_rq **sched_rq;
wait_queue_head_t wake_up_worker;
wait_queue_head_t job_scheduled;
atomic_t hw_rq_count;
@@ -520,7 +523,7 @@ struct drm_gpu_scheduler {
int drm_sched_init(struct drm_gpu_scheduler *sched,
const struct drm_sched_backend_ops *ops,
- uint32_t hw_submission, unsigned hang_limit,
+ u32 num_rqs, uint32_t hw_submission, unsigned int hang_limit,
long timeout, struct workqueue_struct *timeout_wq,
atomic_t *score, const char *name, struct device *dev);

View File

@ -1,241 +0,0 @@
From 35963cf2cd25eeea8bdb4d02853dac1e66fb13a0 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Mon, 30 Oct 2023 20:24:35 -0700
Subject: [PATCH] drm/sched: Add drm_sched_wqueue_* helpers
Add scheduler wqueue ready, stop, and start helpers to hide the
implementation details of the scheduler from the drivers.
v2:
- s/sched_wqueue/sched_wqueue (Luben)
- Remove the extra white line after the return-statement (Luben)
- update drm_sched_wqueue_ready comment (Luben)
Cc: Luben Tuikov <luben.tuikov@amd.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Luben Tuikov <luben.tuikov@amd.com>
Link: https://lore.kernel.org/r/20231031032439.1558703-2-matthew.brost@intel.com
Signed-off-by: Luben Tuikov <ltuikov89@gmail.com>
---
.../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 15 +++----
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 +++---
drivers/gpu/drm/msm/adreno/adreno_device.c | 6 ++-
drivers/gpu/drm/scheduler/sched_main.c | 39 ++++++++++++++++++-
include/drm/gpu_scheduler.h | 3 ++
6 files changed, 59 insertions(+), 18 deletions(-)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -290,7 +290,7 @@ static int suspend_resume_compute_schedu
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
- if (!(ring && ring->sched.thread))
+ if (!(ring && drm_sched_wqueue_ready(&ring->sched)))
continue;
/* stop secheduler and drain ring. */
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1671,9 +1671,9 @@ static int amdgpu_debugfs_test_ib_show(s
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
- if (!ring || !ring->sched.thread)
+ if (!ring || !drm_sched_wqueue_ready(&ring->sched))
continue;
- kthread_park(ring->sched.thread);
+ drm_sched_wqueue_stop(&ring->sched);
}
seq_puts(m, "run ib test:\n");
@@ -1687,9 +1687,9 @@ static int amdgpu_debugfs_test_ib_show(s
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
- if (!ring || !ring->sched.thread)
+ if (!ring || !drm_sched_wqueue_ready(&ring->sched))
continue;
- kthread_unpark(ring->sched.thread);
+ drm_sched_wqueue_start(&ring->sched);
}
up_write(&adev->reset_domain->sem);
@@ -1909,7 +1909,8 @@ static int amdgpu_debugfs_ib_preempt(voi
ring = adev->rings[val];
- if (!ring || !ring->funcs->preempt_ib || !ring->sched.thread)
+ if (!ring || !ring->funcs->preempt_ib ||
+ !drm_sched_wqueue_ready(&ring->sched))
return -EINVAL;
/* the last preemption failed */
@@ -1927,7 +1928,7 @@ static int amdgpu_debugfs_ib_preempt(voi
goto pro_end;
/* stop the scheduler */
- kthread_park(ring->sched.thread);
+ drm_sched_wqueue_stop(&ring->sched);
/* preempt the IB */
r = amdgpu_ring_preempt_ib(ring);
@@ -1961,7 +1962,7 @@ static int amdgpu_debugfs_ib_preempt(voi
failure:
/* restart the scheduler */
- kthread_unpark(ring->sched.thread);
+ drm_sched_wqueue_start(&ring->sched);
up_read(&adev->reset_domain->sem);
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4615,7 +4615,7 @@ bool amdgpu_device_has_job_running(struc
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
- if (!ring || !ring->sched.thread)
+ if (!ring || !drm_sched_wqueue_ready(&ring->sched))
continue;
spin_lock(&ring->sched.job_list_lock);
@@ -4757,7 +4757,7 @@ int amdgpu_device_pre_asic_reset(struct
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
- if (!ring || !ring->sched.thread)
+ if (!ring || !drm_sched_wqueue_ready(&ring->sched))
continue;
/* Clear job fence from fence drv to avoid force_completion
@@ -5297,7 +5297,7 @@ int amdgpu_device_gpu_recover(struct amd
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = tmp_adev->rings[i];
- if (!ring || !ring->sched.thread)
+ if (!ring || !drm_sched_wqueue_ready(&ring->sched))
continue;
drm_sched_stop(&ring->sched, job ? &job->base : NULL);
@@ -5372,7 +5372,7 @@ skip_hw_reset:
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = tmp_adev->rings[i];
- if (!ring || !ring->sched.thread)
+ if (!ring || !drm_sched_wqueue_ready(&ring->sched))
continue;
drm_sched_start(&ring->sched, true);
@@ -5698,7 +5698,7 @@ pci_ers_result_t amdgpu_pci_error_detect
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
- if (!ring || !ring->sched.thread)
+ if (!ring || !drm_sched_wqueue_ready(&ring->sched))
continue;
drm_sched_stop(&ring->sched, NULL);
@@ -5826,7 +5826,7 @@ void amdgpu_pci_resume(struct pci_dev *p
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
- if (!ring || !ring->sched.thread)
+ if (!ring || !drm_sched_wqueue_ready(&ring->sched))
continue;
drm_sched_start(&ring->sched, true);
--- a/drivers/gpu/drm/msm/adreno/adreno_device.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
@@ -810,7 +810,8 @@ static void suspend_scheduler(struct msm
*/
for (i = 0; i < gpu->nr_rings; i++) {
struct drm_gpu_scheduler *sched = &gpu->rb[i]->sched;
- kthread_park(sched->thread);
+
+ drm_sched_wqueue_stop(sched);
}
}
@@ -820,7 +821,8 @@ static void resume_scheduler(struct msm_
for (i = 0; i < gpu->nr_rings; i++) {
struct drm_gpu_scheduler *sched = &gpu->rb[i]->sched;
- kthread_unpark(sched->thread);
+
+ drm_sched_wqueue_start(sched);
}
}
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -439,7 +439,7 @@ void drm_sched_stop(struct drm_gpu_sched
{
struct drm_sched_job *s_job, *tmp;
- kthread_park(sched->thread);
+ drm_sched_wqueue_stop(sched);
/*
* Reinsert back the bad job here - now it's safe as
@@ -552,7 +552,7 @@ void drm_sched_start(struct drm_gpu_sche
spin_unlock(&sched->job_list_lock);
}
- kthread_unpark(sched->thread);
+ drm_sched_wqueue_start(sched);
}
EXPORT_SYMBOL(drm_sched_start);
@@ -1260,3 +1260,38 @@ void drm_sched_increase_karma(struct drm
}
}
EXPORT_SYMBOL(drm_sched_increase_karma);
+
+/**
+ * drm_sched_wqueue_ready - Is the scheduler ready for submission
+ *
+ * @sched: scheduler instance
+ *
+ * Returns true if submission is ready
+ */
+bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched)
+{
+ return !!sched->thread;
+}
+EXPORT_SYMBOL(drm_sched_wqueue_ready);
+
+/**
+ * drm_sched_wqueue_stop - stop scheduler submission
+ *
+ * @sched: scheduler instance
+ */
+void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched)
+{
+ kthread_park(sched->thread);
+}
+EXPORT_SYMBOL(drm_sched_wqueue_stop);
+
+/**
+ * drm_sched_wqueue_start - start scheduler submission
+ *
+ * @sched: scheduler instance
+ */
+void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched)
+{
+ kthread_unpark(sched->thread);
+}
+EXPORT_SYMBOL(drm_sched_wqueue_start);
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -552,6 +552,9 @@ void drm_sched_entity_modify_sched(struc
void drm_sched_job_cleanup(struct drm_sched_job *job);
void drm_sched_wakeup_if_can_queue(struct drm_gpu_scheduler *sched);
+bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched);
+void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched);
+void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched);
void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad);
void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery);
void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched);

View File

@ -1,507 +0,0 @@
From a6149f0393699308fb00149be913044977bceb56 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Mon, 30 Oct 2023 20:24:36 -0700
Subject: [PATCH] drm/sched: Convert drm scheduler to use a work queue rather
than kthread
In Xe, the new Intel GPU driver, a choice has made to have a 1 to 1
mapping between a drm_gpu_scheduler and drm_sched_entity. At first this
seems a bit odd but let us explain the reasoning below.
1. In Xe the submission order from multiple drm_sched_entity is not
guaranteed to be the same completion even if targeting the same hardware
engine. This is because in Xe we have a firmware scheduler, the GuC,
which allowed to reorder, timeslice, and preempt submissions. If a using
shared drm_gpu_scheduler across multiple drm_sched_entity, the TDR falls
apart as the TDR expects submission order == completion order. Using a
dedicated drm_gpu_scheduler per drm_sched_entity solve this problem.
2. In Xe submissions are done via programming a ring buffer (circular
buffer), a drm_gpu_scheduler provides a limit on number of jobs, if the
limit of number jobs is set to RING_SIZE / MAX_SIZE_PER_JOB we get flow
control on the ring for free.
A problem with this design is currently a drm_gpu_scheduler uses a
kthread for submission / job cleanup. This doesn't scale if a large
number of drm_gpu_scheduler are used. To work around the scaling issue,
use a worker rather than kthread for submission / job cleanup.
v2:
- (Rob Clark) Fix msm build
- Pass in run work queue
v3:
- (Boris) don't have loop in worker
v4:
- (Tvrtko) break out submit ready, stop, start helpers into own patch
v5:
- (Boris) default to ordered work queue
v6:
- (Luben / checkpatch) fix alignment in msm_ringbuffer.c
- (Luben) s/drm_sched_submit_queue/drm_sched_wqueue_enqueue
- (Luben) Update comment for drm_sched_wqueue_enqueue
- (Luben) Positive check for submit_wq in drm_sched_init
- (Luben) s/alloc_submit_wq/own_submit_wq
v7:
- (Luben) s/drm_sched_wqueue_enqueue/drm_sched_run_job_queue
v8:
- (Luben) Adjust var names / comments
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Luben Tuikov <luben.tuikov@amd.com>
Link: https://lore.kernel.org/r/20231031032439.1558703-3-matthew.brost@intel.com
Signed-off-by: Luben Tuikov <ltuikov89@gmail.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +-
drivers/gpu/drm/etnaviv/etnaviv_sched.c | 2 +-
drivers/gpu/drm/lima/lima_sched.c | 2 +-
drivers/gpu/drm/msm/msm_ringbuffer.c | 2 +-
drivers/gpu/drm/nouveau/nouveau_sched.c | 2 +-
drivers/gpu/drm/panfrost/panfrost_job.c | 2 +-
drivers/gpu/drm/scheduler/sched_main.c | 131 +++++++++++----------
drivers/gpu/drm/v3d/v3d_sched.c | 10 +-
include/drm/gpu_scheduler.h | 14 ++-
9 files changed, 86 insertions(+), 81 deletions(-)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2276,7 +2276,7 @@ static int amdgpu_device_init_schedulers
break;
}
- r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
+ r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL,
DRM_SCHED_PRIORITY_COUNT,
ring->num_hw_submission, 0,
timeout, adev->reset_domain->wq,
--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@@ -133,7 +133,7 @@ int etnaviv_sched_init(struct etnaviv_gp
{
int ret;
- ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops,
+ ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops, NULL,
DRM_SCHED_PRIORITY_COUNT,
etnaviv_hw_jobs_limit, etnaviv_job_hang_limit,
msecs_to_jiffies(500), NULL, NULL,
--- a/drivers/gpu/drm/lima/lima_sched.c
+++ b/drivers/gpu/drm/lima/lima_sched.c
@@ -495,7 +495,7 @@ int lima_sched_pipe_init(struct lima_sch
INIT_WORK(&pipe->recover_work, lima_sched_recover_work);
- return drm_sched_init(&pipe->base, &lima_sched_ops,
+ return drm_sched_init(&pipe->base, &lima_sched_ops, NULL,
DRM_SCHED_PRIORITY_COUNT,
1,
lima_job_hang_limit,
--- a/drivers/gpu/drm/msm/msm_ringbuffer.c
+++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
@@ -97,7 +97,7 @@ struct msm_ringbuffer *msm_ringbuffer_ne
/* currently managing hangcheck ourselves: */
sched_timeout = MAX_SCHEDULE_TIMEOUT;
- ret = drm_sched_init(&ring->sched, &msm_sched_ops,
+ ret = drm_sched_init(&ring->sched, &msm_sched_ops, NULL,
DRM_SCHED_PRIORITY_COUNT,
num_hw_submissions, 0, sched_timeout,
NULL, NULL, to_msm_bo(ring->bo)->name, gpu->dev->dev);
--- a/drivers/gpu/drm/nouveau/nouveau_sched.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
@@ -440,7 +440,7 @@ int nouveau_sched_init(struct nouveau_dr
if (!drm->sched_wq)
return -ENOMEM;
- return drm_sched_init(sched, &nouveau_sched_ops,
+ return drm_sched_init(sched, &nouveau_sched_ops, NULL,
DRM_SCHED_PRIORITY_COUNT,
NOUVEAU_SCHED_HW_SUBMISSIONS, 0, job_hang_limit,
NULL, NULL, "nouveau_sched", drm->dev->dev);
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -831,7 +831,7 @@ int panfrost_job_init(struct panfrost_de
js->queue[j].fence_context = dma_fence_context_alloc(1);
ret = drm_sched_init(&js->queue[j].sched,
- &panfrost_sched_ops,
+ &panfrost_sched_ops, NULL,
DRM_SCHED_PRIORITY_COUNT,
nentries, 0,
msecs_to_jiffies(JOB_TIMEOUT_MS),
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -48,7 +48,6 @@
* through the jobs entity pointer.
*/
-#include <linux/kthread.h>
#include <linux/wait.h>
#include <linux/sched.h>
#include <linux/completion.h>
@@ -257,6 +256,16 @@ drm_sched_rq_select_entity_fifo(struct d
}
/**
+ * drm_sched_run_job_queue - enqueue run-job work
+ * @sched: scheduler instance
+ */
+static void drm_sched_run_job_queue(struct drm_gpu_scheduler *sched)
+{
+ if (!READ_ONCE(sched->pause_submit))
+ queue_work(sched->submit_wq, &sched->work_run_job);
+}
+
+/**
* drm_sched_job_done - complete a job
* @s_job: pointer to the job which is done
*
@@ -275,7 +284,7 @@ static void drm_sched_job_done(struct dr
dma_fence_get(&s_fence->finished);
drm_sched_fence_finished(s_fence, result);
dma_fence_put(&s_fence->finished);
- wake_up_interruptible(&sched->wake_up_worker);
+ drm_sched_run_job_queue(sched);
}
/**
@@ -882,7 +891,7 @@ static bool drm_sched_can_queue(struct d
void drm_sched_wakeup_if_can_queue(struct drm_gpu_scheduler *sched)
{
if (drm_sched_can_queue(sched))
- wake_up_interruptible(&sched->wake_up_worker);
+ drm_sched_run_job_queue(sched);
}
/**
@@ -993,60 +1002,41 @@ drm_sched_pick_best(struct drm_gpu_sched
EXPORT_SYMBOL(drm_sched_pick_best);
/**
- * drm_sched_blocked - check if the scheduler is blocked
+ * drm_sched_run_job_work - main scheduler thread
*
- * @sched: scheduler instance
- *
- * Returns true if blocked, otherwise false.
+ * @w: run job work
*/
-static bool drm_sched_blocked(struct drm_gpu_scheduler *sched)
+static void drm_sched_run_job_work(struct work_struct *w)
{
- if (kthread_should_park()) {
- kthread_parkme();
- return true;
- }
-
- return false;
-}
-
-/**
- * drm_sched_main - main scheduler thread
- *
- * @param: scheduler instance
- *
- * Returns 0.
- */
-static int drm_sched_main(void *param)
-{
- struct drm_gpu_scheduler *sched = (struct drm_gpu_scheduler *)param;
+ struct drm_gpu_scheduler *sched =
+ container_of(w, struct drm_gpu_scheduler, work_run_job);
+ struct drm_sched_entity *entity;
+ struct drm_sched_job *cleanup_job;
int r;
- sched_set_fifo_low(current);
+ if (READ_ONCE(sched->pause_submit))
+ return;
- while (!kthread_should_stop()) {
- struct drm_sched_entity *entity = NULL;
- struct drm_sched_fence *s_fence;
- struct drm_sched_job *sched_job;
- struct dma_fence *fence;
- struct drm_sched_job *cleanup_job = NULL;
+ cleanup_job = drm_sched_get_cleanup_job(sched);
+ entity = drm_sched_select_entity(sched);
- wait_event_interruptible(sched->wake_up_worker,
- (cleanup_job = drm_sched_get_cleanup_job(sched)) ||
- (!drm_sched_blocked(sched) &&
- (entity = drm_sched_select_entity(sched))) ||
- kthread_should_stop());
+ if (!entity && !cleanup_job)
+ return; /* No more work */
- if (cleanup_job)
- sched->ops->free_job(cleanup_job);
+ if (cleanup_job)
+ sched->ops->free_job(cleanup_job);
- if (!entity)
- continue;
+ if (entity) {
+ struct dma_fence *fence;
+ struct drm_sched_fence *s_fence;
+ struct drm_sched_job *sched_job;
sched_job = drm_sched_entity_pop_job(entity);
-
if (!sched_job) {
complete_all(&entity->entity_idle);
- continue;
+ if (!cleanup_job)
+ return; /* No more work */
+ goto again;
}
s_fence = sched_job->s_fence;
@@ -1077,7 +1067,9 @@ static int drm_sched_main(void *param)
wake_up(&sched->job_scheduled);
}
- return 0;
+
+again:
+ drm_sched_run_job_queue(sched);
}
/**
@@ -1085,6 +1077,8 @@ static int drm_sched_main(void *param)
*
* @sched: scheduler instance
* @ops: backend operations for this scheduler
+ * @submit_wq: workqueue to use for submission. If NULL, an ordered wq is
+ * allocated and used
* @num_rqs: number of runqueues, one for each priority, up to DRM_SCHED_PRIORITY_COUNT
* @hw_submission: number of hw submissions that can be in flight
* @hang_limit: number of times to allow a job to hang before dropping it
@@ -1099,6 +1093,7 @@ static int drm_sched_main(void *param)
*/
int drm_sched_init(struct drm_gpu_scheduler *sched,
const struct drm_sched_backend_ops *ops,
+ struct workqueue_struct *submit_wq,
u32 num_rqs, uint32_t hw_submission, unsigned int hang_limit,
long timeout, struct workqueue_struct *timeout_wq,
atomic_t *score, const char *name, struct device *dev)
@@ -1129,14 +1124,22 @@ int drm_sched_init(struct drm_gpu_schedu
return 0;
}
+ if (submit_wq) {
+ sched->submit_wq = submit_wq;
+ sched->own_submit_wq = false;
+ } else {
+ sched->submit_wq = alloc_ordered_workqueue(name, 0);
+ if (!sched->submit_wq)
+ return -ENOMEM;
+
+ sched->own_submit_wq = true;
+ }
+ ret = -ENOMEM;
sched->sched_rq = kmalloc_array(num_rqs, sizeof(*sched->sched_rq),
GFP_KERNEL | __GFP_ZERO);
- if (!sched->sched_rq) {
- drm_err(sched, "%s: out of memory for sched_rq\n", __func__);
- return -ENOMEM;
- }
+ if (!sched->sched_rq)
+ goto Out_free;
sched->num_rqs = num_rqs;
- ret = -ENOMEM;
for (i = DRM_SCHED_PRIORITY_MIN; i < sched->num_rqs; i++) {
sched->sched_rq[i] = kzalloc(sizeof(*sched->sched_rq[i]), GFP_KERNEL);
if (!sched->sched_rq[i])
@@ -1144,31 +1147,26 @@ int drm_sched_init(struct drm_gpu_schedu
drm_sched_rq_init(sched, sched->sched_rq[i]);
}
- init_waitqueue_head(&sched->wake_up_worker);
init_waitqueue_head(&sched->job_scheduled);
INIT_LIST_HEAD(&sched->pending_list);
spin_lock_init(&sched->job_list_lock);
atomic_set(&sched->hw_rq_count, 0);
INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
+ INIT_WORK(&sched->work_run_job, drm_sched_run_job_work);
atomic_set(&sched->_score, 0);
atomic64_set(&sched->job_id_count, 0);
-
- /* Each scheduler will run on a seperate kernel thread */
- sched->thread = kthread_run(drm_sched_main, sched, sched->name);
- if (IS_ERR(sched->thread)) {
- ret = PTR_ERR(sched->thread);
- sched->thread = NULL;
- DRM_DEV_ERROR(sched->dev, "Failed to create scheduler for %s.\n", name);
- goto Out_unroll;
- }
+ sched->pause_submit = false;
sched->ready = true;
return 0;
Out_unroll:
for (--i ; i >= DRM_SCHED_PRIORITY_MIN; i--)
kfree(sched->sched_rq[i]);
+Out_free:
kfree(sched->sched_rq);
sched->sched_rq = NULL;
+ if (sched->own_submit_wq)
+ destroy_workqueue(sched->submit_wq);
drm_err(sched, "%s: Failed to setup GPU scheduler--out of memory\n", __func__);
return ret;
}
@@ -1186,8 +1184,7 @@ void drm_sched_fini(struct drm_gpu_sched
struct drm_sched_entity *s_entity;
int i;
- if (sched->thread)
- kthread_stop(sched->thread);
+ drm_sched_wqueue_stop(sched);
for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
struct drm_sched_rq *rq = sched->sched_rq[i];
@@ -1210,6 +1207,8 @@ void drm_sched_fini(struct drm_gpu_sched
/* Confirm no work left behind accessing device structures */
cancel_delayed_work_sync(&sched->work_tdr);
+ if (sched->own_submit_wq)
+ destroy_workqueue(sched->submit_wq);
sched->ready = false;
kfree(sched->sched_rq);
sched->sched_rq = NULL;
@@ -1270,7 +1269,7 @@ EXPORT_SYMBOL(drm_sched_increase_karma);
*/
bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched)
{
- return !!sched->thread;
+ return sched->ready;
}
EXPORT_SYMBOL(drm_sched_wqueue_ready);
@@ -1281,7 +1280,8 @@ EXPORT_SYMBOL(drm_sched_wqueue_ready);
*/
void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched)
{
- kthread_park(sched->thread);
+ WRITE_ONCE(sched->pause_submit, true);
+ cancel_work_sync(&sched->work_run_job);
}
EXPORT_SYMBOL(drm_sched_wqueue_stop);
@@ -1292,6 +1292,7 @@ EXPORT_SYMBOL(drm_sched_wqueue_stop);
*/
void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched)
{
- kthread_unpark(sched->thread);
+ WRITE_ONCE(sched->pause_submit, false);
+ queue_work(sched->submit_wq, &sched->work_run_job);
}
EXPORT_SYMBOL(drm_sched_wqueue_start);
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -388,7 +388,7 @@ v3d_sched_init(struct v3d_dev *v3d)
int ret;
ret = drm_sched_init(&v3d->queue[V3D_BIN].sched,
- &v3d_bin_sched_ops,
+ &v3d_bin_sched_ops, NULL,
DRM_SCHED_PRIORITY_COUNT,
hw_jobs_limit, job_hang_limit,
msecs_to_jiffies(hang_limit_ms), NULL,
@@ -397,7 +397,7 @@ v3d_sched_init(struct v3d_dev *v3d)
return ret;
ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched,
- &v3d_render_sched_ops,
+ &v3d_render_sched_ops, NULL,
DRM_SCHED_PRIORITY_COUNT,
hw_jobs_limit, job_hang_limit,
msecs_to_jiffies(hang_limit_ms), NULL,
@@ -406,7 +406,7 @@ v3d_sched_init(struct v3d_dev *v3d)
goto fail;
ret = drm_sched_init(&v3d->queue[V3D_TFU].sched,
- &v3d_tfu_sched_ops,
+ &v3d_tfu_sched_ops, NULL,
DRM_SCHED_PRIORITY_COUNT,
hw_jobs_limit, job_hang_limit,
msecs_to_jiffies(hang_limit_ms), NULL,
@@ -416,7 +416,7 @@ v3d_sched_init(struct v3d_dev *v3d)
if (v3d_has_csd(v3d)) {
ret = drm_sched_init(&v3d->queue[V3D_CSD].sched,
- &v3d_csd_sched_ops,
+ &v3d_csd_sched_ops, NULL,
DRM_SCHED_PRIORITY_COUNT,
hw_jobs_limit, job_hang_limit,
msecs_to_jiffies(hang_limit_ms), NULL,
@@ -425,7 +425,7 @@ v3d_sched_init(struct v3d_dev *v3d)
goto fail;
ret = drm_sched_init(&v3d->queue[V3D_CACHE_CLEAN].sched,
- &v3d_cache_clean_sched_ops,
+ &v3d_cache_clean_sched_ops, NULL,
DRM_SCHED_PRIORITY_COUNT,
hw_jobs_limit, job_hang_limit,
msecs_to_jiffies(hang_limit_ms), NULL,
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -474,17 +474,16 @@ struct drm_sched_backend_ops {
* @num_rqs: Number of run-queues. This is at most DRM_SCHED_PRIORITY_COUNT,
* as there's usually one run-queue per priority, but could be less.
* @sched_rq: An allocated array of run-queues of size @num_rqs;
- * @wake_up_worker: the wait queue on which the scheduler sleeps until a job
- * is ready to be scheduled.
* @job_scheduled: once @drm_sched_entity_do_release is called the scheduler
* waits on this wait queue until all the scheduled jobs are
* finished.
* @hw_rq_count: the number of jobs currently in the hardware queue.
* @job_id_count: used to assign unique id to the each job.
+ * @submit_wq: workqueue used to queue @work_run_job
* @timeout_wq: workqueue used to queue @work_tdr
+ * @work_run_job: work which calls run_job op of each scheduler.
* @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the
* timeout interval is over.
- * @thread: the kthread on which the scheduler which run.
* @pending_list: the list of jobs which are currently in the job queue.
* @job_list_lock: lock to protect the pending_list.
* @hang_limit: once the hangs by a job crosses this limit then it is marked
@@ -493,6 +492,8 @@ struct drm_sched_backend_ops {
* @_score: score used when the driver doesn't provide one
* @ready: marks if the underlying HW is ready to work
* @free_guilty: A hit to time out handler to free the guilty job.
+ * @pause_submit: pause queuing of @work_run_job on @submit_wq
+ * @own_submit_wq: scheduler owns allocation of @submit_wq
* @dev: system &struct device
*
* One scheduler is implemented for each hardware ring.
@@ -504,13 +505,13 @@ struct drm_gpu_scheduler {
const char *name;
u32 num_rqs;
struct drm_sched_rq **sched_rq;
- wait_queue_head_t wake_up_worker;
wait_queue_head_t job_scheduled;
atomic_t hw_rq_count;
atomic64_t job_id_count;
+ struct workqueue_struct *submit_wq;
struct workqueue_struct *timeout_wq;
+ struct work_struct work_run_job;
struct delayed_work work_tdr;
- struct task_struct *thread;
struct list_head pending_list;
spinlock_t job_list_lock;
int hang_limit;
@@ -518,11 +519,14 @@ struct drm_gpu_scheduler {
atomic_t _score;
bool ready;
bool free_guilty;
+ bool pause_submit;
+ bool own_submit_wq;
struct device *dev;
};
int drm_sched_init(struct drm_gpu_scheduler *sched,
const struct drm_sched_backend_ops *ops,
+ struct workqueue_struct *submit_wq,
u32 num_rqs, uint32_t hw_submission, unsigned int hang_limit,
long timeout, struct workqueue_struct *timeout_wq,
atomic_t *score, const char *name, struct device *dev);

View File

@ -1,275 +0,0 @@
From f7fe64ad0f22ff034f8ebcfbd7299ee9cc9b57d7 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Mon, 30 Oct 2023 20:24:37 -0700
Subject: [PATCH] drm/sched: Split free_job into own work item
Rather than call free_job and run_job in same work item have a dedicated
work item for each. This aligns with the design and intended use of work
queues.
v2:
- Test for DMA_FENCE_FLAG_TIMESTAMP_BIT before setting
timestamp in free_job() work item (Danilo)
v3:
- Drop forward dec of drm_sched_select_entity (Boris)
- Return in drm_sched_run_job_work if entity NULL (Boris)
v4:
- Replace dequeue with peek and invert logic (Luben)
- Wrap to 100 lines (Luben)
- Update comments for *_queue / *_queue_if_ready functions (Luben)
v5:
- Drop peek argument, blindly reinit idle (Luben)
- s/drm_sched_free_job_queue_if_ready/drm_sched_free_job_queue_if_done (Luben)
- Update work_run_job & work_free_job kernel doc (Luben)
v6:
- Do not move drm_sched_select_entity in file (Luben)
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Link: https://lore.kernel.org/r/20231031032439.1558703-4-matthew.brost@intel.com
Reviewed-by: Luben Tuikov <ltuikov89@gmail.com>
Signed-off-by: Luben Tuikov <ltuikov89@gmail.com>
---
drivers/gpu/drm/scheduler/sched_main.c | 146 +++++++++++++++++--------
include/drm/gpu_scheduler.h | 4 +-
2 files changed, 101 insertions(+), 49 deletions(-)
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -266,6 +266,32 @@ static void drm_sched_run_job_queue(stru
}
/**
+ * drm_sched_free_job_queue - enqueue free-job work
+ * @sched: scheduler instance
+ */
+static void drm_sched_free_job_queue(struct drm_gpu_scheduler *sched)
+{
+ if (!READ_ONCE(sched->pause_submit))
+ queue_work(sched->submit_wq, &sched->work_free_job);
+}
+
+/**
+ * drm_sched_free_job_queue_if_done - enqueue free-job work if ready
+ * @sched: scheduler instance
+ */
+static void drm_sched_free_job_queue_if_done(struct drm_gpu_scheduler *sched)
+{
+ struct drm_sched_job *job;
+
+ spin_lock(&sched->job_list_lock);
+ job = list_first_entry_or_null(&sched->pending_list,
+ struct drm_sched_job, list);
+ if (job && dma_fence_is_signaled(&job->s_fence->finished))
+ drm_sched_free_job_queue(sched);
+ spin_unlock(&sched->job_list_lock);
+}
+
+/**
* drm_sched_job_done - complete a job
* @s_job: pointer to the job which is done
*
@@ -284,7 +310,7 @@ static void drm_sched_job_done(struct dr
dma_fence_get(&s_fence->finished);
drm_sched_fence_finished(s_fence, result);
dma_fence_put(&s_fence->finished);
- drm_sched_run_job_queue(sched);
+ drm_sched_free_job_queue(sched);
}
/**
@@ -951,8 +977,10 @@ drm_sched_get_cleanup_job(struct drm_gpu
typeof(*next), list);
if (next) {
- next->s_fence->scheduled.timestamp =
- dma_fence_timestamp(&job->s_fence->finished);
+ if (test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT,
+ &next->s_fence->scheduled.flags))
+ next->s_fence->scheduled.timestamp =
+ dma_fence_timestamp(&job->s_fence->finished);
/* start TO timer for next job */
drm_sched_start_timeout(sched);
}
@@ -1002,7 +1030,40 @@ drm_sched_pick_best(struct drm_gpu_sched
EXPORT_SYMBOL(drm_sched_pick_best);
/**
- * drm_sched_run_job_work - main scheduler thread
+ * drm_sched_run_job_queue_if_ready - enqueue run-job work if ready
+ * @sched: scheduler instance
+ */
+static void drm_sched_run_job_queue_if_ready(struct drm_gpu_scheduler *sched)
+{
+ if (drm_sched_select_entity(sched))
+ drm_sched_run_job_queue(sched);
+}
+
+/**
+ * drm_sched_free_job_work - worker to call free_job
+ *
+ * @w: free job work
+ */
+static void drm_sched_free_job_work(struct work_struct *w)
+{
+ struct drm_gpu_scheduler *sched =
+ container_of(w, struct drm_gpu_scheduler, work_free_job);
+ struct drm_sched_job *cleanup_job;
+
+ if (READ_ONCE(sched->pause_submit))
+ return;
+
+ cleanup_job = drm_sched_get_cleanup_job(sched);
+ if (cleanup_job) {
+ sched->ops->free_job(cleanup_job);
+
+ drm_sched_free_job_queue_if_done(sched);
+ drm_sched_run_job_queue_if_ready(sched);
+ }
+}
+
+/**
+ * drm_sched_run_job_work - worker to call run_job
*
* @w: run job work
*/
@@ -1011,65 +1072,51 @@ static void drm_sched_run_job_work(struc
struct drm_gpu_scheduler *sched =
container_of(w, struct drm_gpu_scheduler, work_run_job);
struct drm_sched_entity *entity;
- struct drm_sched_job *cleanup_job;
+ struct dma_fence *fence;
+ struct drm_sched_fence *s_fence;
+ struct drm_sched_job *sched_job;
int r;
if (READ_ONCE(sched->pause_submit))
return;
- cleanup_job = drm_sched_get_cleanup_job(sched);
entity = drm_sched_select_entity(sched);
+ if (!entity)
+ return;
- if (!entity && !cleanup_job)
+ sched_job = drm_sched_entity_pop_job(entity);
+ if (!sched_job) {
+ complete_all(&entity->entity_idle);
return; /* No more work */
+ }
- if (cleanup_job)
- sched->ops->free_job(cleanup_job);
-
- if (entity) {
- struct dma_fence *fence;
- struct drm_sched_fence *s_fence;
- struct drm_sched_job *sched_job;
-
- sched_job = drm_sched_entity_pop_job(entity);
- if (!sched_job) {
- complete_all(&entity->entity_idle);
- if (!cleanup_job)
- return; /* No more work */
- goto again;
- }
-
- s_fence = sched_job->s_fence;
-
- atomic_inc(&sched->hw_rq_count);
- drm_sched_job_begin(sched_job);
+ s_fence = sched_job->s_fence;
- trace_drm_run_job(sched_job, entity);
- fence = sched->ops->run_job(sched_job);
- complete_all(&entity->entity_idle);
- drm_sched_fence_scheduled(s_fence, fence);
+ atomic_inc(&sched->hw_rq_count);
+ drm_sched_job_begin(sched_job);
- if (!IS_ERR_OR_NULL(fence)) {
- /* Drop for original kref_init of the fence */
- dma_fence_put(fence);
+ trace_drm_run_job(sched_job, entity);
+ fence = sched->ops->run_job(sched_job);
+ complete_all(&entity->entity_idle);
+ drm_sched_fence_scheduled(s_fence, fence);
- r = dma_fence_add_callback(fence, &sched_job->cb,
- drm_sched_job_done_cb);
- if (r == -ENOENT)
- drm_sched_job_done(sched_job, fence->error);
- else if (r)
- DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n",
- r);
- } else {
- drm_sched_job_done(sched_job, IS_ERR(fence) ?
- PTR_ERR(fence) : 0);
- }
+ if (!IS_ERR_OR_NULL(fence)) {
+ /* Drop for original kref_init of the fence */
+ dma_fence_put(fence);
- wake_up(&sched->job_scheduled);
+ r = dma_fence_add_callback(fence, &sched_job->cb,
+ drm_sched_job_done_cb);
+ if (r == -ENOENT)
+ drm_sched_job_done(sched_job, fence->error);
+ else if (r)
+ DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n", r);
+ } else {
+ drm_sched_job_done(sched_job, IS_ERR(fence) ?
+ PTR_ERR(fence) : 0);
}
-again:
- drm_sched_run_job_queue(sched);
+ wake_up(&sched->job_scheduled);
+ drm_sched_run_job_queue_if_ready(sched);
}
/**
@@ -1153,6 +1200,7 @@ int drm_sched_init(struct drm_gpu_schedu
atomic_set(&sched->hw_rq_count, 0);
INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
INIT_WORK(&sched->work_run_job, drm_sched_run_job_work);
+ INIT_WORK(&sched->work_free_job, drm_sched_free_job_work);
atomic_set(&sched->_score, 0);
atomic64_set(&sched->job_id_count, 0);
sched->pause_submit = false;
@@ -1282,6 +1330,7 @@ void drm_sched_wqueue_stop(struct drm_gp
{
WRITE_ONCE(sched->pause_submit, true);
cancel_work_sync(&sched->work_run_job);
+ cancel_work_sync(&sched->work_free_job);
}
EXPORT_SYMBOL(drm_sched_wqueue_stop);
@@ -1294,5 +1343,6 @@ void drm_sched_wqueue_start(struct drm_g
{
WRITE_ONCE(sched->pause_submit, false);
queue_work(sched->submit_wq, &sched->work_run_job);
+ queue_work(sched->submit_wq, &sched->work_free_job);
}
EXPORT_SYMBOL(drm_sched_wqueue_start);
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -479,9 +479,10 @@ struct drm_sched_backend_ops {
* finished.
* @hw_rq_count: the number of jobs currently in the hardware queue.
* @job_id_count: used to assign unique id to the each job.
- * @submit_wq: workqueue used to queue @work_run_job
+ * @submit_wq: workqueue used to queue @work_run_job and @work_free_job
* @timeout_wq: workqueue used to queue @work_tdr
* @work_run_job: work which calls run_job op of each scheduler.
+ * @work_free_job: work which calls free_job op of each scheduler.
* @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the
* timeout interval is over.
* @pending_list: the list of jobs which are currently in the job queue.
@@ -511,6 +512,7 @@ struct drm_gpu_scheduler {
struct workqueue_struct *submit_wq;
struct workqueue_struct *timeout_wq;
struct work_struct work_run_job;
+ struct work_struct work_free_job;
struct delayed_work work_tdr;
struct list_head pending_list;
spinlock_t job_list_lock;

View File

@ -1,70 +0,0 @@
From 3c6c7ca4508b6cb1a033ac954c50a1b2c97af883 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Mon, 30 Oct 2023 20:24:39 -0700
Subject: [PATCH] drm/sched: Add a helper to queue TDR immediately
Add a helper whereby a driver can invoke TDR immediately.
v2:
- Drop timeout args, rename function, use mod delayed work (Luben)
v3:
- s/XE/Xe (Luben)
- present tense in commit message (Luben)
- Adjust comment for drm_sched_tdr_queue_imm (Luben)
v4:
- Adjust commit message (Luben)
Cc: Luben Tuikov <luben.tuikov@amd.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Luben Tuikov <luben.tuikov@amd.com>
Link: https://lore.kernel.org/r/20231031032439.1558703-6-matthew.brost@intel.com
Signed-off-by: Luben Tuikov <ltuikov89@gmail.com>
---
drivers/gpu/drm/scheduler/sched_main.c | 18 +++++++++++++++++-
include/drm/gpu_scheduler.h | 1 +
2 files changed, 18 insertions(+), 1 deletion(-)
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -336,7 +336,7 @@ static void drm_sched_start_timeout(stru
{
if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
!list_empty(&sched->pending_list))
- queue_delayed_work(sched->timeout_wq, &sched->work_tdr, sched->timeout);
+ mod_delayed_work(sched->timeout_wq, &sched->work_tdr, sched->timeout);
}
/**
@@ -354,6 +354,22 @@ void drm_sched_fault(struct drm_gpu_sche
EXPORT_SYMBOL(drm_sched_fault);
/**
+ * drm_sched_tdr_queue_imm: - immediately start job timeout handler
+ *
+ * @sched: scheduler for which the timeout handling should be started.
+ *
+ * Start timeout handling immediately for the named scheduler.
+ */
+void drm_sched_tdr_queue_imm(struct drm_gpu_scheduler *sched)
+{
+ spin_lock(&sched->job_list_lock);
+ sched->timeout = 0;
+ drm_sched_start_timeout(sched);
+ spin_unlock(&sched->job_list_lock);
+}
+EXPORT_SYMBOL(drm_sched_tdr_queue_imm);
+
+/**
* drm_sched_suspend_timeout - Suspend scheduler job timeout
*
* @sched: scheduler instance for which to suspend the timeout
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -556,6 +556,7 @@ void drm_sched_entity_modify_sched(struc
struct drm_gpu_scheduler **sched_list,
unsigned int num_sched_list);
+void drm_sched_tdr_queue_imm(struct drm_gpu_scheduler *sched);
void drm_sched_job_cleanup(struct drm_sched_job *job);
void drm_sched_wakeup_if_can_queue(struct drm_gpu_scheduler *sched);
bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched);

View File

@ -1,70 +0,0 @@
From f12af4c461fb6cd5ed7b48f8b4d09b22eb19fcc5 Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Date: Thu, 2 Nov 2023 10:55:38 +0000
Subject: [PATCH] drm/sched: Drop suffix from drm_sched_wakeup_if_can_queue
Because a) helper is exported to other parts of the scheduler and
b) there isn't a plain drm_sched_wakeup to begin with, I think we can
drop the suffix and by doing so separate the intimiate knowledge
between the scheduler components a bit better.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Luben Tuikov <ltuikov89@gmail.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231102105538.391648-6-tvrtko.ursulin@linux.intel.com
Reviewed-by: Luben Tuikov <ltuikov89@gmail.com>
Signed-off-by: Luben Tuikov <ltuikov89@gmail.com>
---
drivers/gpu/drm/scheduler/sched_entity.c | 4 ++--
drivers/gpu/drm/scheduler/sched_main.c | 4 ++--
include/drm/gpu_scheduler.h | 2 +-
3 files changed, 5 insertions(+), 5 deletions(-)
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -372,7 +372,7 @@ static void drm_sched_entity_wakeup(stru
container_of(cb, struct drm_sched_entity, cb);
drm_sched_entity_clear_dep(f, cb);
- drm_sched_wakeup_if_can_queue(entity->rq->sched);
+ drm_sched_wakeup(entity->rq->sched);
}
/**
@@ -604,7 +604,7 @@ void drm_sched_entity_push_job(struct dr
if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
drm_sched_rq_update_fifo(entity, submit_ts);
- drm_sched_wakeup_if_can_queue(entity->rq->sched);
+ drm_sched_wakeup(entity->rq->sched);
}
}
EXPORT_SYMBOL(drm_sched_entity_push_job);
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -925,12 +925,12 @@ static bool drm_sched_can_queue(struct d
}
/**
- * drm_sched_wakeup_if_can_queue - Wake up the scheduler
+ * drm_sched_wakeup - Wake up the scheduler if it is ready to queue
* @sched: scheduler instance
*
* Wake up the scheduler if we can queue jobs.
*/
-void drm_sched_wakeup_if_can_queue(struct drm_gpu_scheduler *sched)
+void drm_sched_wakeup(struct drm_gpu_scheduler *sched)
{
if (drm_sched_can_queue(sched))
drm_sched_run_job_queue(sched);
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -558,7 +558,7 @@ void drm_sched_entity_modify_sched(struc
void drm_sched_tdr_queue_imm(struct drm_gpu_scheduler *sched);
void drm_sched_job_cleanup(struct drm_sched_job *job);
-void drm_sched_wakeup_if_can_queue(struct drm_gpu_scheduler *sched);
+void drm_sched_wakeup(struct drm_gpu_scheduler *sched);
bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched);
void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched);
void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched);

View File

@ -1,69 +0,0 @@
From f3123c2590005c5ff631653d31428e40cd10c618 Mon Sep 17 00:00:00 2001
From: Luben Tuikov <ltuikov89@gmail.com>
Date: Thu, 9 Nov 2023 18:53:26 -0500
Subject: [PATCH] drm/sched: Qualify drm_sched_wakeup() by
drm_sched_entity_is_ready()
Don't "wake up" the GPU scheduler unless the entity is ready, as well as we
can queue to the scheduler, i.e. there is no point in waking up the scheduler
for the entity unless the entity is ready.
Signed-off-by: Luben Tuikov <ltuikov89@gmail.com>
Fixes: bc8d6a9df99038 ("drm/sched: Don't disturb the entity when in RR-mode scheduling")
Reviewed-by: Danilo Krummrich <dakr@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231110000123.72565-2-ltuikov89@gmail.com
---
drivers/gpu/drm/scheduler/sched_entity.c | 4 ++--
drivers/gpu/drm/scheduler/sched_main.c | 8 +++++---
include/drm/gpu_scheduler.h | 2 +-
3 files changed, 8 insertions(+), 6 deletions(-)
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -372,7 +372,7 @@ static void drm_sched_entity_wakeup(stru
container_of(cb, struct drm_sched_entity, cb);
drm_sched_entity_clear_dep(f, cb);
- drm_sched_wakeup(entity->rq->sched);
+ drm_sched_wakeup(entity->rq->sched, entity);
}
/**
@@ -604,7 +604,7 @@ void drm_sched_entity_push_job(struct dr
if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
drm_sched_rq_update_fifo(entity, submit_ts);
- drm_sched_wakeup(entity->rq->sched);
+ drm_sched_wakeup(entity->rq->sched, entity);
}
}
EXPORT_SYMBOL(drm_sched_entity_push_job);
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -930,10 +930,12 @@ static bool drm_sched_can_queue(struct d
*
* Wake up the scheduler if we can queue jobs.
*/
-void drm_sched_wakeup(struct drm_gpu_scheduler *sched)
+void drm_sched_wakeup(struct drm_gpu_scheduler *sched,
+ struct drm_sched_entity *entity)
{
- if (drm_sched_can_queue(sched))
- drm_sched_run_job_queue(sched);
+ if (drm_sched_entity_is_ready(entity))
+ if (drm_sched_can_queue(sched))
+ drm_sched_run_job_queue(sched);
}
/**
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -558,7 +558,7 @@ void drm_sched_entity_modify_sched(struc
void drm_sched_tdr_queue_imm(struct drm_gpu_scheduler *sched);
void drm_sched_job_cleanup(struct drm_sched_job *job);
-void drm_sched_wakeup(struct drm_gpu_scheduler *sched);
+void drm_sched_wakeup(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity);
bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched);
void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched);
void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched);

View File

@ -1,612 +0,0 @@
From a78422e9dff366b3a46ae44caf6ec8ded9c9fc2f Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@redhat.com>
Date: Fri, 10 Nov 2023 01:16:33 +0100
Subject: [PATCH] drm/sched: implement dynamic job-flow control
Currently, job flow control is implemented simply by limiting the number
of jobs in flight. Therefore, a scheduler is initialized with a credit
limit that corresponds to the number of jobs which can be sent to the
hardware.
This implies that for each job, drivers need to account for the maximum
job size possible in order to not overflow the ring buffer.
However, there are drivers, such as Nouveau, where the job size has a
rather large range. For such drivers it can easily happen that job
submissions not even filling the ring by 1% can block subsequent
submissions, which, in the worst case, can lead to the ring run dry.
In order to overcome this issue, allow for tracking the actual job size
instead of the number of jobs. Therefore, add a field to track a job's
credit count, which represents the number of credits a job contributes
to the scheduler's credit limit.
Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Reviewed-by: Luben Tuikov <ltuikov89@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231110001638.71750-1-dakr@redhat.com
---
Documentation/gpu/drm-mm.rst | 6 +
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 2 +-
drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c | 2 +-
drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 2 +-
drivers/gpu/drm/lima/lima_device.c | 2 +-
drivers/gpu/drm/lima/lima_sched.c | 2 +-
drivers/gpu/drm/msm/msm_gem_submit.c | 2 +-
drivers/gpu/drm/nouveau/nouveau_sched.c | 2 +-
drivers/gpu/drm/panfrost/panfrost_drv.c | 2 +-
drivers/gpu/drm/panfrost/panfrost_job.c | 2 +-
.../gpu/drm/scheduler/gpu_scheduler_trace.h | 2 +-
drivers/gpu/drm/scheduler/sched_main.c | 170 ++++++++++++++----
drivers/gpu/drm/v3d/v3d_gem.c | 2 +-
include/drm/gpu_scheduler.h | 28 ++-
14 files changed, 175 insertions(+), 51 deletions(-)
--- a/Documentation/gpu/drm-mm.rst
+++ b/Documentation/gpu/drm-mm.rst
@@ -552,6 +552,12 @@ Overview
.. kernel-doc:: drivers/gpu/drm/scheduler/sched_main.c
:doc: Overview
+Flow Control
+------------
+
+.. kernel-doc:: drivers/gpu/drm/scheduler/sched_main.c
+ :doc: Flow Control
+
Scheduler Function References
-----------------------------
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -115,7 +115,7 @@ int amdgpu_job_alloc(struct amdgpu_devic
if (!entity)
return 0;
- return drm_sched_job_init(&(*job)->base, entity, owner);
+ return drm_sched_job_init(&(*job)->base, entity, 1, owner);
}
int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev,
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
@@ -535,7 +535,7 @@ int etnaviv_ioctl_gem_submit(struct drm_
ret = drm_sched_job_init(&submit->sched_job,
&ctx->sched_entity[args->pipe],
- submit->ctx);
+ 1, submit->ctx);
if (ret)
goto err_submit_put;
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -1917,7 +1917,7 @@ static int etnaviv_gpu_rpm_suspend(struc
u32 idle, mask;
/* If there are any jobs in the HW queue, we're not idle */
- if (atomic_read(&gpu->sched.hw_rq_count))
+ if (atomic_read(&gpu->sched.credit_count))
return -EBUSY;
/* Check whether the hardware (except FE and MC) is idle */
--- a/drivers/gpu/drm/lima/lima_device.c
+++ b/drivers/gpu/drm/lima/lima_device.c
@@ -514,7 +514,7 @@ int lima_device_suspend(struct device *d
/* check any task running */
for (i = 0; i < lima_pipe_num; i++) {
- if (atomic_read(&ldev->pipe[i].base.hw_rq_count))
+ if (atomic_read(&ldev->pipe[i].base.credit_count))
return -EBUSY;
}
--- a/drivers/gpu/drm/lima/lima_sched.c
+++ b/drivers/gpu/drm/lima/lima_sched.c
@@ -123,7 +123,7 @@ int lima_sched_task_init(struct lima_sch
for (i = 0; i < num_bos; i++)
drm_gem_object_get(&bos[i]->base.base);
- err = drm_sched_job_init(&task->base, &context->base, vm);
+ err = drm_sched_job_init(&task->base, &context->base, 1, vm);
if (err) {
kfree(task->bos);
return err;
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -48,7 +48,7 @@ static struct msm_gem_submit *submit_cre
return ERR_PTR(ret);
}
- ret = drm_sched_job_init(&submit->base, queue->entity, queue);
+ ret = drm_sched_job_init(&submit->base, queue->entity, 1, queue);
if (ret) {
kfree(submit->hw_fence);
kfree(submit);
--- a/drivers/gpu/drm/nouveau/nouveau_sched.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
@@ -89,7 +89,7 @@ nouveau_job_init(struct nouveau_job *job
}
- ret = drm_sched_job_init(&job->base, &entity->base, NULL);
+ ret = drm_sched_job_init(&job->base, &entity->base, 1, NULL);
if (ret)
goto err_free_chains;
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -272,7 +272,7 @@ static int panfrost_ioctl_submit(struct
ret = drm_sched_job_init(&job->base,
&file_priv->sched_entity[slot],
- NULL);
+ 1, NULL);
if (ret)
goto out_put_job;
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -939,7 +939,7 @@ int panfrost_job_is_idle(struct panfrost
for (i = 0; i < NUM_JOB_SLOTS; i++) {
/* If there are any jobs in the HW queue, we're not idle */
- if (atomic_read(&js->queue[i].sched.hw_rq_count))
+ if (atomic_read(&js->queue[i].sched.credit_count))
return false;
}
--- a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
+++ b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
@@ -51,7 +51,7 @@ DECLARE_EVENT_CLASS(drm_sched_job,
__assign_str(name, sched_job->sched->name);
__entry->job_count = spsc_queue_count(&entity->job_queue);
__entry->hw_job_count = atomic_read(
- &sched_job->sched->hw_rq_count);
+ &sched_job->sched->credit_count);
),
TP_printk("entity=%p, id=%llu, fence=%p, ring=%s, job count:%u, hw job count:%d",
__entry->entity, __entry->id,
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -48,6 +48,30 @@
* through the jobs entity pointer.
*/
+/**
+ * DOC: Flow Control
+ *
+ * The DRM GPU scheduler provides a flow control mechanism to regulate the rate
+ * in which the jobs fetched from scheduler entities are executed.
+ *
+ * In this context the &drm_gpu_scheduler keeps track of a driver specified
+ * credit limit representing the capacity of this scheduler and a credit count;
+ * every &drm_sched_job carries a driver specified number of credits.
+ *
+ * Once a job is executed (but not yet finished), the job's credits contribute
+ * to the scheduler's credit count until the job is finished. If by executing
+ * one more job the scheduler's credit count would exceed the scheduler's
+ * credit limit, the job won't be executed. Instead, the scheduler will wait
+ * until the credit count has decreased enough to not overflow its credit limit.
+ * This implies waiting for previously executed jobs.
+ *
+ * Optionally, drivers may register a callback (update_job_credits) provided by
+ * struct drm_sched_backend_ops to update the job's credits dynamically. The
+ * scheduler executes this callback every time the scheduler considers a job for
+ * execution and subsequently checks whether the job fits the scheduler's credit
+ * limit.
+ */
+
#include <linux/wait.h>
#include <linux/sched.h>
#include <linux/completion.h>
@@ -75,6 +99,51 @@ int drm_sched_policy = DRM_SCHED_POLICY_
MODULE_PARM_DESC(sched_policy, "Specify the scheduling policy for entities on a run-queue, " __stringify(DRM_SCHED_POLICY_RR) " = Round Robin, " __stringify(DRM_SCHED_POLICY_FIFO) " = FIFO (default).");
module_param_named(sched_policy, drm_sched_policy, int, 0444);
+static u32 drm_sched_available_credits(struct drm_gpu_scheduler *sched)
+{
+ u32 credits;
+
+ drm_WARN_ON(sched, check_sub_overflow(sched->credit_limit,
+ atomic_read(&sched->credit_count),
+ &credits));
+
+ return credits;
+}
+
+/**
+ * drm_sched_can_queue -- Can we queue more to the hardware?
+ * @sched: scheduler instance
+ * @entity: the scheduler entity
+ *
+ * Return true if we can push at least one more job from @entity, false
+ * otherwise.
+ */
+static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched,
+ struct drm_sched_entity *entity)
+{
+ struct drm_sched_job *s_job;
+
+ s_job = to_drm_sched_job(spsc_queue_peek(&entity->job_queue));
+ if (!s_job)
+ return false;
+
+ if (sched->ops->update_job_credits) {
+ s_job->credits = sched->ops->update_job_credits(s_job);
+
+ drm_WARN(sched, !s_job->credits,
+ "Jobs with zero credits bypass job-flow control.\n");
+ }
+
+ /* If a job exceeds the credit limit, truncate it to the credit limit
+ * itself to guarantee forward progress.
+ */
+ if (drm_WARN(sched, s_job->credits > sched->credit_limit,
+ "Jobs may not exceed the credit limit, truncate.\n"))
+ s_job->credits = sched->credit_limit;
+
+ return drm_sched_available_credits(sched) >= s_job->credits;
+}
+
static __always_inline bool drm_sched_entity_compare_before(struct rb_node *a,
const struct rb_node *b)
{
@@ -186,12 +255,18 @@ void drm_sched_rq_remove_entity(struct d
/**
* drm_sched_rq_select_entity_rr - Select an entity which could provide a job to run
*
+ * @sched: the gpu scheduler
* @rq: scheduler run queue to check.
*
- * Try to find a ready entity, returns NULL if none found.
+ * Try to find the next ready entity.
+ *
+ * Return an entity if one is found; return an error-pointer (!NULL) if an
+ * entity was ready, but the scheduler had insufficient credits to accommodate
+ * its job; return NULL, if no ready entity was found.
*/
static struct drm_sched_entity *
-drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq)
+drm_sched_rq_select_entity_rr(struct drm_gpu_scheduler *sched,
+ struct drm_sched_rq *rq)
{
struct drm_sched_entity *entity;
@@ -201,6 +276,14 @@ drm_sched_rq_select_entity_rr(struct drm
if (entity) {
list_for_each_entry_continue(entity, &rq->entities, list) {
if (drm_sched_entity_is_ready(entity)) {
+ /* If we can't queue yet, preserve the current
+ * entity in terms of fairness.
+ */
+ if (!drm_sched_can_queue(sched, entity)) {
+ spin_unlock(&rq->lock);
+ return ERR_PTR(-ENOSPC);
+ }
+
rq->current_entity = entity;
reinit_completion(&entity->entity_idle);
spin_unlock(&rq->lock);
@@ -210,8 +293,15 @@ drm_sched_rq_select_entity_rr(struct drm
}
list_for_each_entry(entity, &rq->entities, list) {
-
if (drm_sched_entity_is_ready(entity)) {
+ /* If we can't queue yet, preserve the current entity in
+ * terms of fairness.
+ */
+ if (!drm_sched_can_queue(sched, entity)) {
+ spin_unlock(&rq->lock);
+ return ERR_PTR(-ENOSPC);
+ }
+
rq->current_entity = entity;
reinit_completion(&entity->entity_idle);
spin_unlock(&rq->lock);
@@ -230,12 +320,18 @@ drm_sched_rq_select_entity_rr(struct drm
/**
* drm_sched_rq_select_entity_fifo - Select an entity which provides a job to run
*
+ * @sched: the gpu scheduler
* @rq: scheduler run queue to check.
*
- * Find oldest waiting ready entity, returns NULL if none found.
+ * Find oldest waiting ready entity.
+ *
+ * Return an entity if one is found; return an error-pointer (!NULL) if an
+ * entity was ready, but the scheduler had insufficient credits to accommodate
+ * its job; return NULL, if no ready entity was found.
*/
static struct drm_sched_entity *
-drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq)
+drm_sched_rq_select_entity_fifo(struct drm_gpu_scheduler *sched,
+ struct drm_sched_rq *rq)
{
struct rb_node *rb;
@@ -245,6 +341,14 @@ drm_sched_rq_select_entity_fifo(struct d
entity = rb_entry(rb, struct drm_sched_entity, rb_tree_node);
if (drm_sched_entity_is_ready(entity)) {
+ /* If we can't queue yet, preserve the current entity in
+ * terms of fairness.
+ */
+ if (!drm_sched_can_queue(sched, entity)) {
+ spin_unlock(&rq->lock);
+ return ERR_PTR(-ENOSPC);
+ }
+
rq->current_entity = entity;
reinit_completion(&entity->entity_idle);
break;
@@ -302,7 +406,7 @@ static void drm_sched_job_done(struct dr
struct drm_sched_fence *s_fence = s_job->s_fence;
struct drm_gpu_scheduler *sched = s_fence->sched;
- atomic_dec(&sched->hw_rq_count);
+ atomic_sub(s_job->credits, &sched->credit_count);
atomic_dec(sched->score);
trace_drm_sched_process_job(s_fence);
@@ -519,7 +623,7 @@ void drm_sched_stop(struct drm_gpu_sched
&s_job->cb)) {
dma_fence_put(s_job->s_fence->parent);
s_job->s_fence->parent = NULL;
- atomic_dec(&sched->hw_rq_count);
+ atomic_sub(s_job->credits, &sched->credit_count);
} else {
/*
* remove job from pending_list.
@@ -580,7 +684,7 @@ void drm_sched_start(struct drm_gpu_sche
list_for_each_entry_safe(s_job, tmp, &sched->pending_list, list) {
struct dma_fence *fence = s_job->s_fence->parent;
- atomic_inc(&sched->hw_rq_count);
+ atomic_add(s_job->credits, &sched->credit_count);
if (!full_recovery)
continue;
@@ -664,6 +768,8 @@ EXPORT_SYMBOL(drm_sched_resubmit_jobs);
* drm_sched_job_init - init a scheduler job
* @job: scheduler job to init
* @entity: scheduler entity to use
+ * @credits: the number of credits this job contributes to the schedulers
+ * credit limit
* @owner: job owner for debugging
*
* Refer to drm_sched_entity_push_job() documentation
@@ -681,7 +787,7 @@ EXPORT_SYMBOL(drm_sched_resubmit_jobs);
*/
int drm_sched_job_init(struct drm_sched_job *job,
struct drm_sched_entity *entity,
- void *owner)
+ u32 credits, void *owner)
{
if (!entity->rq) {
/* This will most likely be followed by missing frames
@@ -700,7 +806,13 @@ int drm_sched_job_init(struct drm_sched_
*/
memset(job, 0, sizeof(*job));
+ if (unlikely(!credits)) {
+ pr_err("*ERROR* %s: credits cannot be 0!\n", __func__);
+ return -EINVAL;
+ }
+
job->entity = entity;
+ job->credits = credits;
job->s_fence = drm_sched_fence_alloc(entity, owner);
if (!job->s_fence)
return -ENOMEM;
@@ -913,20 +1025,9 @@ void drm_sched_job_cleanup(struct drm_sc
EXPORT_SYMBOL(drm_sched_job_cleanup);
/**
- * drm_sched_can_queue -- Can we queue more to the hardware?
- * @sched: scheduler instance
- *
- * Return true if we can push more jobs to the hw, otherwise false.
- */
-static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched)
-{
- return atomic_read(&sched->hw_rq_count) <
- sched->hw_submission_limit;
-}
-
-/**
* drm_sched_wakeup - Wake up the scheduler if it is ready to queue
* @sched: scheduler instance
+ * @entity: the scheduler entity
*
* Wake up the scheduler if we can queue jobs.
*/
@@ -934,7 +1035,7 @@ void drm_sched_wakeup(struct drm_gpu_sch
struct drm_sched_entity *entity)
{
if (drm_sched_entity_is_ready(entity))
- if (drm_sched_can_queue(sched))
+ if (drm_sched_can_queue(sched, entity))
drm_sched_run_job_queue(sched);
}
@@ -943,7 +1044,11 @@ void drm_sched_wakeup(struct drm_gpu_sch
*
* @sched: scheduler instance
*
- * Returns the entity to process or NULL if none are found.
+ * Return an entity to process or NULL if none are found.
+ *
+ * Note, that we break out of the for-loop when "entity" is non-null, which can
+ * also be an error-pointer--this assures we don't process lower priority
+ * run-queues. See comments in the respectively called functions.
*/
static struct drm_sched_entity *
drm_sched_select_entity(struct drm_gpu_scheduler *sched)
@@ -951,19 +1056,16 @@ drm_sched_select_entity(struct drm_gpu_s
struct drm_sched_entity *entity;
int i;
- if (!drm_sched_can_queue(sched))
- return NULL;
-
/* Kernel run queue has higher priority than normal run queue*/
for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
entity = drm_sched_policy == DRM_SCHED_POLICY_FIFO ?
- drm_sched_rq_select_entity_fifo(sched->sched_rq[i]) :
- drm_sched_rq_select_entity_rr(sched->sched_rq[i]);
+ drm_sched_rq_select_entity_fifo(sched, sched->sched_rq[i]) :
+ drm_sched_rq_select_entity_rr(sched, sched->sched_rq[i]);
if (entity)
break;
}
- return entity;
+ return IS_ERR(entity) ? NULL : entity;
}
/**
@@ -1110,7 +1212,7 @@ static void drm_sched_run_job_work(struc
s_fence = sched_job->s_fence;
- atomic_inc(&sched->hw_rq_count);
+ atomic_add(sched_job->credits, &sched->credit_count);
drm_sched_job_begin(sched_job);
trace_drm_run_job(sched_job, entity);
@@ -1145,7 +1247,7 @@ static void drm_sched_run_job_work(struc
* @submit_wq: workqueue to use for submission. If NULL, an ordered wq is
* allocated and used
* @num_rqs: number of runqueues, one for each priority, up to DRM_SCHED_PRIORITY_COUNT
- * @hw_submission: number of hw submissions that can be in flight
+ * @credit_limit: the number of credits this scheduler can hold from all jobs
* @hang_limit: number of times to allow a job to hang before dropping it
* @timeout: timeout value in jiffies for the scheduler
* @timeout_wq: workqueue to use for timeout work. If NULL, the system_wq is
@@ -1159,14 +1261,14 @@ static void drm_sched_run_job_work(struc
int drm_sched_init(struct drm_gpu_scheduler *sched,
const struct drm_sched_backend_ops *ops,
struct workqueue_struct *submit_wq,
- u32 num_rqs, uint32_t hw_submission, unsigned int hang_limit,
+ u32 num_rqs, u32 credit_limit, unsigned int hang_limit,
long timeout, struct workqueue_struct *timeout_wq,
atomic_t *score, const char *name, struct device *dev)
{
int i, ret;
sched->ops = ops;
- sched->hw_submission_limit = hw_submission;
+ sched->credit_limit = credit_limit;
sched->name = name;
sched->timeout = timeout;
sched->timeout_wq = timeout_wq ? : system_wq;
@@ -1215,7 +1317,7 @@ int drm_sched_init(struct drm_gpu_schedu
init_waitqueue_head(&sched->job_scheduled);
INIT_LIST_HEAD(&sched->pending_list);
spin_lock_init(&sched->job_list_lock);
- atomic_set(&sched->hw_rq_count, 0);
+ atomic_set(&sched->credit_count, 0);
INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
INIT_WORK(&sched->work_run_job, drm_sched_run_job_work);
INIT_WORK(&sched->work_free_job, drm_sched_free_job_work);
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -417,7 +417,7 @@ v3d_job_init(struct v3d_dev *v3d, struct
job->free = free;
ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue],
- v3d_priv);
+ 1, v3d_priv);
if (ret)
goto fail;
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -320,6 +320,7 @@ struct drm_sched_fence *to_drm_sched_fen
* @sched: the scheduler instance on which this job is scheduled.
* @s_fence: contains the fences for the scheduling of job.
* @finish_cb: the callback for the finished fence.
+ * @credits: the number of credits this job contributes to the scheduler
* @work: Helper to reschdeule job kill to different context.
* @id: a unique id assigned to each job scheduled on the scheduler.
* @karma: increment on every hang caused by this job. If this exceeds the hang
@@ -339,6 +340,8 @@ struct drm_sched_job {
struct drm_gpu_scheduler *sched;
struct drm_sched_fence *s_fence;
+ u32 credits;
+
/*
* work is used only after finish_cb has been used and will not be
* accessed anymore.
@@ -462,13 +465,27 @@ struct drm_sched_backend_ops {
* and it's time to clean it up.
*/
void (*free_job)(struct drm_sched_job *sched_job);
+
+ /**
+ * @update_job_credits: Called when the scheduler is considering this
+ * job for execution.
+ *
+ * This callback returns the number of credits the job would take if
+ * pushed to the hardware. Drivers may use this to dynamically update
+ * the job's credit count. For instance, deduct the number of credits
+ * for already signalled native fences.
+ *
+ * This callback is optional.
+ */
+ u32 (*update_job_credits)(struct drm_sched_job *sched_job);
};
/**
* struct drm_gpu_scheduler - scheduler instance-specific data
*
* @ops: backend operations provided by the driver.
- * @hw_submission_limit: the max size of the hardware queue.
+ * @credit_limit: the credit limit of this scheduler
+ * @credit_count: the current credit count of this scheduler
* @timeout: the time after which a job is removed from the scheduler.
* @name: name of the ring for which this scheduler is being used.
* @num_rqs: Number of run-queues. This is at most DRM_SCHED_PRIORITY_COUNT,
@@ -477,7 +494,6 @@ struct drm_sched_backend_ops {
* @job_scheduled: once @drm_sched_entity_do_release is called the scheduler
* waits on this wait queue until all the scheduled jobs are
* finished.
- * @hw_rq_count: the number of jobs currently in the hardware queue.
* @job_id_count: used to assign unique id to the each job.
* @submit_wq: workqueue used to queue @work_run_job and @work_free_job
* @timeout_wq: workqueue used to queue @work_tdr
@@ -501,13 +517,13 @@ struct drm_sched_backend_ops {
*/
struct drm_gpu_scheduler {
const struct drm_sched_backend_ops *ops;
- uint32_t hw_submission_limit;
+ u32 credit_limit;
+ atomic_t credit_count;
long timeout;
const char *name;
u32 num_rqs;
struct drm_sched_rq **sched_rq;
wait_queue_head_t job_scheduled;
- atomic_t hw_rq_count;
atomic64_t job_id_count;
struct workqueue_struct *submit_wq;
struct workqueue_struct *timeout_wq;
@@ -529,14 +545,14 @@ struct drm_gpu_scheduler {
int drm_sched_init(struct drm_gpu_scheduler *sched,
const struct drm_sched_backend_ops *ops,
struct workqueue_struct *submit_wq,
- u32 num_rqs, uint32_t hw_submission, unsigned int hang_limit,
+ u32 num_rqs, u32 credit_limit, unsigned int hang_limit,
long timeout, struct workqueue_struct *timeout_wq,
atomic_t *score, const char *name, struct device *dev);
void drm_sched_fini(struct drm_gpu_scheduler *sched);
int drm_sched_job_init(struct drm_sched_job *job,
struct drm_sched_entity *entity,
- void *owner);
+ u32 credits, void *owner);
void drm_sched_job_arm(struct drm_sched_job *job);
int drm_sched_job_add_dependency(struct drm_sched_job *job,
struct dma_fence *fence);

View File

@ -1,129 +0,0 @@
From 17b226dcf80ce79d02f4f0b08813d8848885b986 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Fri, 24 Nov 2023 15:24:33 +0100
Subject: [PATCH] iommu: Allow passing custom allocators to pgtable drivers
This will be useful for GPU drivers who want to keep page tables in a
pool so they can:
- keep freed page tables in a free pool and speed-up upcoming page
table allocations
- batch page table allocation instead of allocating one page at a time
- pre-reserve pages for page tables needed for map/unmap operations,
to ensure map/unmap operations don't try to allocate memory in paths
they're allowed to block or fail
It might also be valuable for other aspects of GPU and similar
use-cases, like fine-grained memory accounting and resource limiting.
We will extend the Arm LPAE format to support custom allocators in a
separate commit.
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Link: https://lore.kernel.org/r/20231124142434.1577550-2-boris.brezillon@collabora.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
drivers/iommu/io-pgtable.c | 23 +++++++++++++++++++++++
include/linux/io-pgtable.h | 34 ++++++++++++++++++++++++++++++++++
2 files changed, 57 insertions(+)
--- a/drivers/iommu/io-pgtable.c
+++ b/drivers/iommu/io-pgtable.c
@@ -34,6 +34,26 @@ io_pgtable_init_table[IO_PGTABLE_NUM_FMT
#endif
};
+static int check_custom_allocator(enum io_pgtable_fmt fmt,
+ struct io_pgtable_cfg *cfg)
+{
+ /* No custom allocator, no need to check the format. */
+ if (!cfg->alloc && !cfg->free)
+ return 0;
+
+ /* When passing a custom allocator, both the alloc and free
+ * functions should be provided.
+ */
+ if (!cfg->alloc || !cfg->free)
+ return -EINVAL;
+
+ /* Make sure the format supports custom allocators. */
+ if (io_pgtable_init_table[fmt]->caps & IO_PGTABLE_CAP_CUSTOM_ALLOCATOR)
+ return 0;
+
+ return -EINVAL;
+}
+
struct io_pgtable_ops *alloc_io_pgtable_ops(enum io_pgtable_fmt fmt,
struct io_pgtable_cfg *cfg,
void *cookie)
@@ -44,6 +64,9 @@ struct io_pgtable_ops *alloc_io_pgtable_
if (fmt >= IO_PGTABLE_NUM_FMTS)
return NULL;
+ if (check_custom_allocator(fmt, cfg))
+ return NULL;
+
fns = io_pgtable_init_table[fmt];
if (!fns)
return NULL;
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -100,6 +100,30 @@ struct io_pgtable_cfg {
const struct iommu_flush_ops *tlb;
struct device *iommu_dev;
+ /**
+ * @alloc: Custom page allocator.
+ *
+ * Optional hook used to allocate page tables. If this function is NULL,
+ * @free must be NULL too.
+ *
+ * Memory returned should be zeroed and suitable for dma_map_single() and
+ * virt_to_phys().
+ *
+ * Not all formats support custom page allocators. Before considering
+ * passing a non-NULL value, make sure the chosen page format supports
+ * this feature.
+ */
+ void *(*alloc)(void *cookie, size_t size, gfp_t gfp);
+
+ /**
+ * @free: Custom page de-allocator.
+ *
+ * Optional hook used to free page tables allocated with the @alloc
+ * hook. Must be non-NULL if @alloc is not NULL, must be NULL
+ * otherwise.
+ */
+ void (*free)(void *cookie, void *pages, size_t size);
+
/* Low-level data specific to the table format */
union {
struct {
@@ -238,15 +262,25 @@ io_pgtable_tlb_add_page(struct io_pgtabl
}
/**
+ * enum io_pgtable_caps - IO page table backend capabilities.
+ */
+enum io_pgtable_caps {
+ /** @IO_PGTABLE_CAP_CUSTOM_ALLOCATOR: Backend accepts custom page table allocators. */
+ IO_PGTABLE_CAP_CUSTOM_ALLOCATOR = BIT(0),
+};
+
+/**
* struct io_pgtable_init_fns - Alloc/free a set of page tables for a
* particular format.
*
* @alloc: Allocate a set of page tables described by cfg.
* @free: Free the page tables associated with iop.
+ * @caps: Combination of @io_pgtable_caps flags encoding the backend capabilities.
*/
struct io_pgtable_init_fns {
struct io_pgtable *(*alloc)(struct io_pgtable_cfg *cfg, void *cookie);
void (*free)(struct io_pgtable *iop);
+ u32 caps;
};
extern struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s1_init_fns;

View File

@ -1,68 +0,0 @@
From cf41cebf9dc8143ca7bb0aabb7e0053e16f0515a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Fri, 19 Jan 2024 10:05:57 +0100
Subject: [PATCH] drm/exec, drm/gpuvm: Prefer u32 over uint32_t
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The relatively recently introduced drm/exec utility was using uint32_t
in its interface, which was then also carried over to drm/gpuvm.
Prefer u32 in new code and update drm/exec and drm/gpuvm accordingly.
Cc: Christian König <christian.koenig@amd.com>
Cc: Danilo Krummrich <dakr@redhat.com>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Danilo Krummrich <dakr@redhat.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240119090557.6360-1-thomas.hellstrom@linux.intel.com
---
drivers/gpu/drm/drm_exec.c | 2 +-
include/drm/drm_exec.h | 4 ++--
include/drm/drm_gpuvm.h | 2 +-
3 files changed, 4 insertions(+), 4 deletions(-)
--- a/drivers/gpu/drm/drm_exec.c
+++ b/drivers/gpu/drm/drm_exec.c
@@ -72,7 +72,7 @@ static void drm_exec_unlock_all(struct d
*
* Initialize the object and make sure that we can track locked objects.
*/
-void drm_exec_init(struct drm_exec *exec, uint32_t flags)
+void drm_exec_init(struct drm_exec *exec, u32 flags)
{
exec->flags = flags;
exec->objects = kmalloc(PAGE_SIZE, GFP_KERNEL);
--- a/include/drm/drm_exec.h
+++ b/include/drm/drm_exec.h
@@ -18,7 +18,7 @@ struct drm_exec {
/**
* @flags: Flags to control locking behavior
*/
- uint32_t flags;
+ u32 flags;
/**
* @ticket: WW ticket used for acquiring locks
@@ -135,7 +135,7 @@ static inline bool drm_exec_is_contended
return !!exec->contended;
}
-void drm_exec_init(struct drm_exec *exec, uint32_t flags);
+void drm_exec_init(struct drm_exec *exec, u32 flags);
void drm_exec_fini(struct drm_exec *exec);
bool drm_exec_cleanup(struct drm_exec *exec);
int drm_exec_lock_obj(struct drm_exec *exec, struct drm_gem_object *obj);
--- a/include/drm/drm_gpuvm.h
+++ b/include/drm/drm_gpuvm.h
@@ -514,7 +514,7 @@ struct drm_gpuvm_exec {
/**
* @flags: the flags for the struct drm_exec
*/
- uint32_t flags;
+ u32 flags;
/**
* @vm: the &drm_gpuvm to lock its DMA reservations

View File

@ -1,280 +0,0 @@
From 546b366600ef34847702f43bb2d22f914d19eae0 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Thu, 29 Feb 2024 17:22:16 +0100
Subject: [PATCH] drm/panthor: Add GPU register definitions
Those are the registers directly accessible through the MMIO range.
FW registers are exposed in panthor_fw.h.
v6:
- Add Maxime's and Heiko's acks
v4:
- Add the CORE_FEATURES register (needed for GPU variants)
- Add Steve's R-b
v3:
- Add macros to extract GPU ID info
- Formatting changes
- Remove AS_TRANSCFG_ADRMODE_LEGACY - it doesn't exist post-CSF
- Remove CSF_GPU_LATEST_FLUSH_ID_DEFAULT
- Add GPU_L2_FEATURES_LINE_SIZE for extracting the GPU cache line size
Co-developed-by: Steven Price <steven.price@arm.com>
Signed-off-by: Steven Price <steven.price@arm.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Acked-by: Steven Price <steven.price@arm.com> # MIT+GPL2 relicensing,Arm
Acked-by: Grant Likely <grant.likely@linaro.org> # MIT+GPL2 relicensing,Linaro
Acked-by: Boris Brezillon <boris.brezillon@collabora.com> # MIT+GPL2 relicensing,Collabora
Reviewed-by: Steven Price <steven.price@arm.com>
Acked-by: Maxime Ripard <mripard@kernel.org>
Acked-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20240229162230.2634044-3-boris.brezillon@collabora.com
---
drivers/gpu/drm/panthor/panthor_regs.h | 239 +++++++++++++++++++++++++
1 file changed, 239 insertions(+)
create mode 100644 drivers/gpu/drm/panthor/panthor_regs.h
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_regs.h
@@ -0,0 +1,239 @@
+/* SPDX-License-Identifier: GPL-2.0 or MIT */
+/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
+/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
+/* Copyright 2023 Collabora ltd. */
+/*
+ * Register definitions based on mali_kbase_gpu_regmap.h and
+ * mali_kbase_gpu_regmap_csf.h
+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ */
+#ifndef __PANTHOR_REGS_H__
+#define __PANTHOR_REGS_H__
+
+#define GPU_ID 0x0
+#define GPU_ARCH_MAJOR(x) ((x) >> 28)
+#define GPU_ARCH_MINOR(x) (((x) & GENMASK(27, 24)) >> 24)
+#define GPU_ARCH_REV(x) (((x) & GENMASK(23, 20)) >> 20)
+#define GPU_PROD_MAJOR(x) (((x) & GENMASK(19, 16)) >> 16)
+#define GPU_VER_MAJOR(x) (((x) & GENMASK(15, 12)) >> 12)
+#define GPU_VER_MINOR(x) (((x) & GENMASK(11, 4)) >> 4)
+#define GPU_VER_STATUS(x) ((x) & GENMASK(3, 0))
+
+#define GPU_L2_FEATURES 0x4
+#define GPU_L2_FEATURES_LINE_SIZE(x) (1 << ((x) & GENMASK(7, 0)))
+
+#define GPU_CORE_FEATURES 0x8
+
+#define GPU_TILER_FEATURES 0xC
+#define GPU_MEM_FEATURES 0x10
+#define GROUPS_L2_COHERENT BIT(0)
+
+#define GPU_MMU_FEATURES 0x14
+#define GPU_MMU_FEATURES_VA_BITS(x) ((x) & GENMASK(7, 0))
+#define GPU_MMU_FEATURES_PA_BITS(x) (((x) >> 8) & GENMASK(7, 0))
+#define GPU_AS_PRESENT 0x18
+#define GPU_CSF_ID 0x1C
+
+#define GPU_INT_RAWSTAT 0x20
+#define GPU_INT_CLEAR 0x24
+#define GPU_INT_MASK 0x28
+#define GPU_INT_STAT 0x2c
+#define GPU_IRQ_FAULT BIT(0)
+#define GPU_IRQ_PROTM_FAULT BIT(1)
+#define GPU_IRQ_RESET_COMPLETED BIT(8)
+#define GPU_IRQ_POWER_CHANGED BIT(9)
+#define GPU_IRQ_POWER_CHANGED_ALL BIT(10)
+#define GPU_IRQ_CLEAN_CACHES_COMPLETED BIT(17)
+#define GPU_IRQ_DOORBELL_MIRROR BIT(18)
+#define GPU_IRQ_MCU_STATUS_CHANGED BIT(19)
+#define GPU_CMD 0x30
+#define GPU_CMD_DEF(type, payload) ((type) | ((payload) << 8))
+#define GPU_SOFT_RESET GPU_CMD_DEF(1, 1)
+#define GPU_HARD_RESET GPU_CMD_DEF(1, 2)
+#define CACHE_CLEAN BIT(0)
+#define CACHE_INV BIT(1)
+#define GPU_FLUSH_CACHES(l2, lsc, oth) \
+ GPU_CMD_DEF(4, ((l2) << 0) | ((lsc) << 4) | ((oth) << 8))
+
+#define GPU_STATUS 0x34
+#define GPU_STATUS_ACTIVE BIT(0)
+#define GPU_STATUS_PWR_ACTIVE BIT(1)
+#define GPU_STATUS_PAGE_FAULT BIT(4)
+#define GPU_STATUS_PROTM_ACTIVE BIT(7)
+#define GPU_STATUS_DBG_ENABLED BIT(8)
+
+#define GPU_FAULT_STATUS 0x3C
+#define GPU_FAULT_ADDR_LO 0x40
+#define GPU_FAULT_ADDR_HI 0x44
+
+#define GPU_PWR_KEY 0x50
+#define GPU_PWR_KEY_UNLOCK 0x2968A819
+#define GPU_PWR_OVERRIDE0 0x54
+#define GPU_PWR_OVERRIDE1 0x58
+
+#define GPU_TIMESTAMP_OFFSET_LO 0x88
+#define GPU_TIMESTAMP_OFFSET_HI 0x8C
+#define GPU_CYCLE_COUNT_LO 0x90
+#define GPU_CYCLE_COUNT_HI 0x94
+#define GPU_TIMESTAMP_LO 0x98
+#define GPU_TIMESTAMP_HI 0x9C
+
+#define GPU_THREAD_MAX_THREADS 0xA0
+#define GPU_THREAD_MAX_WORKGROUP_SIZE 0xA4
+#define GPU_THREAD_MAX_BARRIER_SIZE 0xA8
+#define GPU_THREAD_FEATURES 0xAC
+
+#define GPU_TEXTURE_FEATURES(n) (0xB0 + ((n) * 4))
+
+#define GPU_SHADER_PRESENT_LO 0x100
+#define GPU_SHADER_PRESENT_HI 0x104
+#define GPU_TILER_PRESENT_LO 0x110
+#define GPU_TILER_PRESENT_HI 0x114
+#define GPU_L2_PRESENT_LO 0x120
+#define GPU_L2_PRESENT_HI 0x124
+
+#define SHADER_READY_LO 0x140
+#define SHADER_READY_HI 0x144
+#define TILER_READY_LO 0x150
+#define TILER_READY_HI 0x154
+#define L2_READY_LO 0x160
+#define L2_READY_HI 0x164
+
+#define SHADER_PWRON_LO 0x180
+#define SHADER_PWRON_HI 0x184
+#define TILER_PWRON_LO 0x190
+#define TILER_PWRON_HI 0x194
+#define L2_PWRON_LO 0x1A0
+#define L2_PWRON_HI 0x1A4
+
+#define SHADER_PWROFF_LO 0x1C0
+#define SHADER_PWROFF_HI 0x1C4
+#define TILER_PWROFF_LO 0x1D0
+#define TILER_PWROFF_HI 0x1D4
+#define L2_PWROFF_LO 0x1E0
+#define L2_PWROFF_HI 0x1E4
+
+#define SHADER_PWRTRANS_LO 0x200
+#define SHADER_PWRTRANS_HI 0x204
+#define TILER_PWRTRANS_LO 0x210
+#define TILER_PWRTRANS_HI 0x214
+#define L2_PWRTRANS_LO 0x220
+#define L2_PWRTRANS_HI 0x224
+
+#define SHADER_PWRACTIVE_LO 0x240
+#define SHADER_PWRACTIVE_HI 0x244
+#define TILER_PWRACTIVE_LO 0x250
+#define TILER_PWRACTIVE_HI 0x254
+#define L2_PWRACTIVE_LO 0x260
+#define L2_PWRACTIVE_HI 0x264
+
+#define GPU_REVID 0x280
+
+#define GPU_COHERENCY_FEATURES 0x300
+#define GPU_COHERENCY_PROT_BIT(name) BIT(GPU_COHERENCY_ ## name)
+
+#define GPU_COHERENCY_PROTOCOL 0x304
+#define GPU_COHERENCY_ACE 0
+#define GPU_COHERENCY_ACE_LITE 1
+#define GPU_COHERENCY_NONE 31
+
+#define MCU_CONTROL 0x700
+#define MCU_CONTROL_ENABLE 1
+#define MCU_CONTROL_AUTO 2
+#define MCU_CONTROL_DISABLE 0
+
+#define MCU_STATUS 0x704
+#define MCU_STATUS_DISABLED 0
+#define MCU_STATUS_ENABLED 1
+#define MCU_STATUS_HALT 2
+#define MCU_STATUS_FATAL 3
+
+/* Job Control regs */
+#define JOB_INT_RAWSTAT 0x1000
+#define JOB_INT_CLEAR 0x1004
+#define JOB_INT_MASK 0x1008
+#define JOB_INT_STAT 0x100c
+#define JOB_INT_GLOBAL_IF BIT(31)
+#define JOB_INT_CSG_IF(x) BIT(x)
+
+/* MMU regs */
+#define MMU_INT_RAWSTAT 0x2000
+#define MMU_INT_CLEAR 0x2004
+#define MMU_INT_MASK 0x2008
+#define MMU_INT_STAT 0x200c
+
+/* AS_COMMAND register commands */
+
+#define MMU_BASE 0x2400
+#define MMU_AS_SHIFT 6
+#define MMU_AS(as) (MMU_BASE + ((as) << MMU_AS_SHIFT))
+
+#define AS_TRANSTAB_LO(as) (MMU_AS(as) + 0x0)
+#define AS_TRANSTAB_HI(as) (MMU_AS(as) + 0x4)
+#define AS_MEMATTR_LO(as) (MMU_AS(as) + 0x8)
+#define AS_MEMATTR_HI(as) (MMU_AS(as) + 0xC)
+#define AS_MEMATTR_AARCH64_INNER_ALLOC_IMPL (2 << 2)
+#define AS_MEMATTR_AARCH64_INNER_ALLOC_EXPL(w, r) ((3 << 2) | \
+ ((w) ? BIT(0) : 0) | \
+ ((r) ? BIT(1) : 0))
+#define AS_MEMATTR_AARCH64_SH_MIDGARD_INNER (0 << 4)
+#define AS_MEMATTR_AARCH64_SH_CPU_INNER (1 << 4)
+#define AS_MEMATTR_AARCH64_SH_CPU_INNER_SHADER_COH (2 << 4)
+#define AS_MEMATTR_AARCH64_SHARED (0 << 6)
+#define AS_MEMATTR_AARCH64_INNER_OUTER_NC (1 << 6)
+#define AS_MEMATTR_AARCH64_INNER_OUTER_WB (2 << 6)
+#define AS_MEMATTR_AARCH64_FAULT (3 << 6)
+#define AS_LOCKADDR_LO(as) (MMU_AS(as) + 0x10)
+#define AS_LOCKADDR_HI(as) (MMU_AS(as) + 0x14)
+#define AS_COMMAND(as) (MMU_AS(as) + 0x18)
+#define AS_COMMAND_NOP 0
+#define AS_COMMAND_UPDATE 1
+#define AS_COMMAND_LOCK 2
+#define AS_COMMAND_UNLOCK 3
+#define AS_COMMAND_FLUSH_PT 4
+#define AS_COMMAND_FLUSH_MEM 5
+#define AS_LOCK_REGION_MIN_SIZE (1ULL << 15)
+#define AS_FAULTSTATUS(as) (MMU_AS(as) + 0x1C)
+#define AS_FAULTSTATUS_ACCESS_TYPE_MASK (0x3 << 8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC (0x0 << 8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_EX (0x1 << 8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2 << 8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3 << 8)
+#define AS_FAULTADDRESS_LO(as) (MMU_AS(as) + 0x20)
+#define AS_FAULTADDRESS_HI(as) (MMU_AS(as) + 0x24)
+#define AS_STATUS(as) (MMU_AS(as) + 0x28)
+#define AS_STATUS_AS_ACTIVE BIT(0)
+#define AS_TRANSCFG_LO(as) (MMU_AS(as) + 0x30)
+#define AS_TRANSCFG_HI(as) (MMU_AS(as) + 0x34)
+#define AS_TRANSCFG_ADRMODE_UNMAPPED (1 << 0)
+#define AS_TRANSCFG_ADRMODE_IDENTITY (2 << 0)
+#define AS_TRANSCFG_ADRMODE_AARCH64_4K (6 << 0)
+#define AS_TRANSCFG_ADRMODE_AARCH64_64K (8 << 0)
+#define AS_TRANSCFG_INA_BITS(x) ((x) << 6)
+#define AS_TRANSCFG_OUTA_BITS(x) ((x) << 14)
+#define AS_TRANSCFG_SL_CONCAT BIT(22)
+#define AS_TRANSCFG_PTW_MEMATTR_NC (1 << 24)
+#define AS_TRANSCFG_PTW_MEMATTR_WB (2 << 24)
+#define AS_TRANSCFG_PTW_SH_NS (0 << 28)
+#define AS_TRANSCFG_PTW_SH_OS (2 << 28)
+#define AS_TRANSCFG_PTW_SH_IS (3 << 28)
+#define AS_TRANSCFG_PTW_RA BIT(30)
+#define AS_TRANSCFG_DISABLE_HIER_AP BIT(33)
+#define AS_TRANSCFG_DISABLE_AF_FAULT BIT(34)
+#define AS_TRANSCFG_WXN BIT(35)
+#define AS_TRANSCFG_XREADABLE BIT(36)
+#define AS_FAULTEXTRA_LO(as) (MMU_AS(as) + 0x38)
+#define AS_FAULTEXTRA_HI(as) (MMU_AS(as) + 0x3C)
+
+#define CSF_GPU_LATEST_FLUSH_ID 0x10000
+
+#define CSF_DOORBELL(i) (0x80000 + ((i) * 0x10000))
+#define CSF_GLB_DOORBELL_ID 0
+
+#define gpu_write(dev, reg, data) \
+ writel(data, (dev)->iomem + (reg))
+
+#define gpu_read(dev, reg) \
+ readl((dev)->iomem + (reg))
+
+#endif

View File

@ -1,593 +0,0 @@
From 5cd894e258c4b0b92b9b475309cea244e590d194 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Thu, 29 Feb 2024 17:22:18 +0100
Subject: [PATCH] drm/panthor: Add the GPU logical block
Handles everything that's not related to the FW, the MMU or the
scheduler. This is the block dealing with the GPU property retrieval,
the GPU block power on/off logic, and some global operations, like
global cache flushing.
v6:
- Add Maxime's and Heiko's acks
v5:
- Fix GPU_MODEL() kernel doc
- Fix test in panthor_gpu_block_power_off()
- Add Steve's R-b
v4:
- Expose CORE_FEATURES through DEV_QUERY
v3:
- Add acks for the MIT/GPL2 relicensing
- Use macros to extract GPU ID info
- Make sure we reset clear pending_reqs bits when wait_event_timeout()
times out but the corresponding bit is cleared in GPU_INT_RAWSTAT
(can happen if the IRQ is masked or HW takes to long to call the IRQ
handler)
- GPU_MODEL now takes separate arch and product majors to be more
readable.
- Drop GPU_IRQ_MCU_STATUS_CHANGED from interrupt mask.
- Handle GPU_IRQ_PROTM_FAULT correctly (don't output registers that are
not updated for protected interrupts).
- Minor code tidy ups
Cc: Alexey Sheplyakov <asheplyakov@basealt.ru> # MIT+GPL2 relicensing
Co-developed-by: Steven Price <steven.price@arm.com>
Signed-off-by: Steven Price <steven.price@arm.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Acked-by: Steven Price <steven.price@arm.com> # MIT+GPL2 relicensing,Arm
Acked-by: Grant Likely <grant.likely@linaro.org> # MIT+GPL2 relicensing,Linaro
Acked-by: Boris Brezillon <boris.brezillon@collabora.com> # MIT+GPL2 relicensing,Collabora
Reviewed-by: Steven Price <steven.price@arm.com>
Acked-by: Maxime Ripard <mripard@kernel.org>
Acked-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20240229162230.2634044-5-boris.brezillon@collabora.com
---
drivers/gpu/drm/panthor/panthor_gpu.c | 482 ++++++++++++++++++++++++++
drivers/gpu/drm/panthor/panthor_gpu.h | 52 +++
2 files changed, 534 insertions(+)
create mode 100644 drivers/gpu/drm/panthor/panthor_gpu.c
create mode 100644 drivers/gpu/drm/panthor/panthor_gpu.h
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_gpu.c
@@ -0,0 +1,482 @@
+// SPDX-License-Identifier: GPL-2.0 or MIT
+/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
+/* Copyright 2019 Linaro, Ltd., Rob Herring <robh@kernel.org> */
+/* Copyright 2019 Collabora ltd. */
+
+#include <linux/bitfield.h>
+#include <linux/bitmap.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_managed.h>
+
+#include "panthor_device.h"
+#include "panthor_gpu.h"
+#include "panthor_regs.h"
+
+/**
+ * struct panthor_gpu - GPU block management data.
+ */
+struct panthor_gpu {
+ /** @irq: GPU irq. */
+ struct panthor_irq irq;
+
+ /** @reqs_lock: Lock protecting access to pending_reqs. */
+ spinlock_t reqs_lock;
+
+ /** @pending_reqs: Pending GPU requests. */
+ u32 pending_reqs;
+
+ /** @reqs_acked: GPU request wait queue. */
+ wait_queue_head_t reqs_acked;
+};
+
+/**
+ * struct panthor_model - GPU model description
+ */
+struct panthor_model {
+ /** @name: Model name. */
+ const char *name;
+
+ /** @arch_major: Major version number of architecture. */
+ u8 arch_major;
+
+ /** @product_major: Major version number of product. */
+ u8 product_major;
+};
+
+/**
+ * GPU_MODEL() - Define a GPU model. A GPU product can be uniquely identified
+ * by a combination of the major architecture version and the major product
+ * version.
+ * @_name: Name for the GPU model.
+ * @_arch_major: Architecture major.
+ * @_product_major: Product major.
+ */
+#define GPU_MODEL(_name, _arch_major, _product_major) \
+{\
+ .name = __stringify(_name), \
+ .arch_major = _arch_major, \
+ .product_major = _product_major, \
+}
+
+static const struct panthor_model gpu_models[] = {
+ GPU_MODEL(g610, 10, 7),
+ {},
+};
+
+#define GPU_INTERRUPTS_MASK \
+ (GPU_IRQ_FAULT | \
+ GPU_IRQ_PROTM_FAULT | \
+ GPU_IRQ_RESET_COMPLETED | \
+ GPU_IRQ_CLEAN_CACHES_COMPLETED)
+
+static void panthor_gpu_init_info(struct panthor_device *ptdev)
+{
+ const struct panthor_model *model;
+ u32 arch_major, product_major;
+ u32 major, minor, status;
+ unsigned int i;
+
+ ptdev->gpu_info.gpu_id = gpu_read(ptdev, GPU_ID);
+ ptdev->gpu_info.csf_id = gpu_read(ptdev, GPU_CSF_ID);
+ ptdev->gpu_info.gpu_rev = gpu_read(ptdev, GPU_REVID);
+ ptdev->gpu_info.core_features = gpu_read(ptdev, GPU_CORE_FEATURES);
+ ptdev->gpu_info.l2_features = gpu_read(ptdev, GPU_L2_FEATURES);
+ ptdev->gpu_info.tiler_features = gpu_read(ptdev, GPU_TILER_FEATURES);
+ ptdev->gpu_info.mem_features = gpu_read(ptdev, GPU_MEM_FEATURES);
+ ptdev->gpu_info.mmu_features = gpu_read(ptdev, GPU_MMU_FEATURES);
+ ptdev->gpu_info.thread_features = gpu_read(ptdev, GPU_THREAD_FEATURES);
+ ptdev->gpu_info.max_threads = gpu_read(ptdev, GPU_THREAD_MAX_THREADS);
+ ptdev->gpu_info.thread_max_workgroup_size = gpu_read(ptdev, GPU_THREAD_MAX_WORKGROUP_SIZE);
+ ptdev->gpu_info.thread_max_barrier_size = gpu_read(ptdev, GPU_THREAD_MAX_BARRIER_SIZE);
+ ptdev->gpu_info.coherency_features = gpu_read(ptdev, GPU_COHERENCY_FEATURES);
+ for (i = 0; i < 4; i++)
+ ptdev->gpu_info.texture_features[i] = gpu_read(ptdev, GPU_TEXTURE_FEATURES(i));
+
+ ptdev->gpu_info.as_present = gpu_read(ptdev, GPU_AS_PRESENT);
+
+ ptdev->gpu_info.shader_present = gpu_read(ptdev, GPU_SHADER_PRESENT_LO);
+ ptdev->gpu_info.shader_present |= (u64)gpu_read(ptdev, GPU_SHADER_PRESENT_HI) << 32;
+
+ ptdev->gpu_info.tiler_present = gpu_read(ptdev, GPU_TILER_PRESENT_LO);
+ ptdev->gpu_info.tiler_present |= (u64)gpu_read(ptdev, GPU_TILER_PRESENT_HI) << 32;
+
+ ptdev->gpu_info.l2_present = gpu_read(ptdev, GPU_L2_PRESENT_LO);
+ ptdev->gpu_info.l2_present |= (u64)gpu_read(ptdev, GPU_L2_PRESENT_HI) << 32;
+
+ arch_major = GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id);
+ product_major = GPU_PROD_MAJOR(ptdev->gpu_info.gpu_id);
+ major = GPU_VER_MAJOR(ptdev->gpu_info.gpu_id);
+ minor = GPU_VER_MINOR(ptdev->gpu_info.gpu_id);
+ status = GPU_VER_STATUS(ptdev->gpu_info.gpu_id);
+
+ for (model = gpu_models; model->name; model++) {
+ if (model->arch_major == arch_major &&
+ model->product_major == product_major)
+ break;
+ }
+
+ drm_info(&ptdev->base,
+ "mali-%s id 0x%x major 0x%x minor 0x%x status 0x%x",
+ model->name ?: "unknown", ptdev->gpu_info.gpu_id >> 16,
+ major, minor, status);
+
+ drm_info(&ptdev->base,
+ "Features: L2:%#x Tiler:%#x Mem:%#x MMU:%#x AS:%#x",
+ ptdev->gpu_info.l2_features,
+ ptdev->gpu_info.tiler_features,
+ ptdev->gpu_info.mem_features,
+ ptdev->gpu_info.mmu_features,
+ ptdev->gpu_info.as_present);
+
+ drm_info(&ptdev->base,
+ "shader_present=0x%0llx l2_present=0x%0llx tiler_present=0x%0llx",
+ ptdev->gpu_info.shader_present, ptdev->gpu_info.l2_present,
+ ptdev->gpu_info.tiler_present);
+}
+
+static void panthor_gpu_irq_handler(struct panthor_device *ptdev, u32 status)
+{
+ if (status & GPU_IRQ_FAULT) {
+ u32 fault_status = gpu_read(ptdev, GPU_FAULT_STATUS);
+ u64 address = ((u64)gpu_read(ptdev, GPU_FAULT_ADDR_HI) << 32) |
+ gpu_read(ptdev, GPU_FAULT_ADDR_LO);
+
+ drm_warn(&ptdev->base, "GPU Fault 0x%08x (%s) at 0x%016llx\n",
+ fault_status, panthor_exception_name(ptdev, fault_status & 0xFF),
+ address);
+ }
+ if (status & GPU_IRQ_PROTM_FAULT)
+ drm_warn(&ptdev->base, "GPU Fault in protected mode\n");
+
+ spin_lock(&ptdev->gpu->reqs_lock);
+ if (status & ptdev->gpu->pending_reqs) {
+ ptdev->gpu->pending_reqs &= ~status;
+ wake_up_all(&ptdev->gpu->reqs_acked);
+ }
+ spin_unlock(&ptdev->gpu->reqs_lock);
+}
+PANTHOR_IRQ_HANDLER(gpu, GPU, panthor_gpu_irq_handler);
+
+/**
+ * panthor_gpu_unplug() - Called when the GPU is unplugged.
+ * @ptdev: Device to unplug.
+ */
+void panthor_gpu_unplug(struct panthor_device *ptdev)
+{
+ unsigned long flags;
+
+ /* Make sure the IRQ handler is not running after that point. */
+ panthor_gpu_irq_suspend(&ptdev->gpu->irq);
+
+ /* Wake-up all waiters. */
+ spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags);
+ ptdev->gpu->pending_reqs = 0;
+ wake_up_all(&ptdev->gpu->reqs_acked);
+ spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags);
+}
+
+/**
+ * panthor_gpu_init() - Initialize the GPU block
+ * @ptdev: Device.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int panthor_gpu_init(struct panthor_device *ptdev)
+{
+ struct panthor_gpu *gpu;
+ u32 pa_bits;
+ int ret, irq;
+
+ gpu = drmm_kzalloc(&ptdev->base, sizeof(*gpu), GFP_KERNEL);
+ if (!gpu)
+ return -ENOMEM;
+
+ spin_lock_init(&gpu->reqs_lock);
+ init_waitqueue_head(&gpu->reqs_acked);
+ ptdev->gpu = gpu;
+ panthor_gpu_init_info(ptdev);
+
+ dma_set_max_seg_size(ptdev->base.dev, UINT_MAX);
+ pa_bits = GPU_MMU_FEATURES_PA_BITS(ptdev->gpu_info.mmu_features);
+ ret = dma_set_mask_and_coherent(ptdev->base.dev, DMA_BIT_MASK(pa_bits));
+ if (ret)
+ return ret;
+
+ irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "gpu");
+ if (irq <= 0)
+ return ret;
+
+ ret = panthor_request_gpu_irq(ptdev, &ptdev->gpu->irq, irq, GPU_INTERRUPTS_MASK);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+/**
+ * panthor_gpu_block_power_off() - Power-off a specific block of the GPU
+ * @ptdev: Device.
+ * @blk_name: Block name.
+ * @pwroff_reg: Power-off register for this block.
+ * @pwrtrans_reg: Power transition register for this block.
+ * @mask: Sub-elements to power-off.
+ * @timeout_us: Timeout in microseconds.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int panthor_gpu_block_power_off(struct panthor_device *ptdev,
+ const char *blk_name,
+ u32 pwroff_reg, u32 pwrtrans_reg,
+ u64 mask, u32 timeout_us)
+{
+ u32 val, i;
+ int ret;
+
+ for (i = 0; i < 2; i++) {
+ u32 mask32 = mask >> (i * 32);
+
+ if (!mask32)
+ continue;
+
+ ret = readl_relaxed_poll_timeout(ptdev->iomem + pwrtrans_reg + (i * 4),
+ val, !(mask32 & val),
+ 100, timeout_us);
+ if (ret) {
+ drm_err(&ptdev->base, "timeout waiting on %s:%llx power transition",
+ blk_name, mask);
+ return ret;
+ }
+ }
+
+ if (mask & GENMASK(31, 0))
+ gpu_write(ptdev, pwroff_reg, mask);
+
+ if (mask >> 32)
+ gpu_write(ptdev, pwroff_reg + 4, mask >> 32);
+
+ for (i = 0; i < 2; i++) {
+ u32 mask32 = mask >> (i * 32);
+
+ if (!mask32)
+ continue;
+
+ ret = readl_relaxed_poll_timeout(ptdev->iomem + pwrtrans_reg + (i * 4),
+ val, !(mask32 & val),
+ 100, timeout_us);
+ if (ret) {
+ drm_err(&ptdev->base, "timeout waiting on %s:%llx power transition",
+ blk_name, mask);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * panthor_gpu_block_power_on() - Power-on a specific block of the GPU
+ * @ptdev: Device.
+ * @blk_name: Block name.
+ * @pwron_reg: Power-on register for this block.
+ * @pwrtrans_reg: Power transition register for this block.
+ * @rdy_reg: Power transition ready register.
+ * @mask: Sub-elements to power-on.
+ * @timeout_us: Timeout in microseconds.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int panthor_gpu_block_power_on(struct panthor_device *ptdev,
+ const char *blk_name,
+ u32 pwron_reg, u32 pwrtrans_reg,
+ u32 rdy_reg, u64 mask, u32 timeout_us)
+{
+ u32 val, i;
+ int ret;
+
+ for (i = 0; i < 2; i++) {
+ u32 mask32 = mask >> (i * 32);
+
+ if (!mask32)
+ continue;
+
+ ret = readl_relaxed_poll_timeout(ptdev->iomem + pwrtrans_reg + (i * 4),
+ val, !(mask32 & val),
+ 100, timeout_us);
+ if (ret) {
+ drm_err(&ptdev->base, "timeout waiting on %s:%llx power transition",
+ blk_name, mask);
+ return ret;
+ }
+ }
+
+ if (mask & GENMASK(31, 0))
+ gpu_write(ptdev, pwron_reg, mask);
+
+ if (mask >> 32)
+ gpu_write(ptdev, pwron_reg + 4, mask >> 32);
+
+ for (i = 0; i < 2; i++) {
+ u32 mask32 = mask >> (i * 32);
+
+ if (!mask32)
+ continue;
+
+ ret = readl_relaxed_poll_timeout(ptdev->iomem + rdy_reg + (i * 4),
+ val, (mask32 & val) == mask32,
+ 100, timeout_us);
+ if (ret) {
+ drm_err(&ptdev->base, "timeout waiting on %s:%llx readyness",
+ blk_name, mask);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * panthor_gpu_l2_power_on() - Power-on the L2-cache
+ * @ptdev: Device.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int panthor_gpu_l2_power_on(struct panthor_device *ptdev)
+{
+ if (ptdev->gpu_info.l2_present != 1) {
+ /*
+ * Only support one core group now.
+ * ~(l2_present - 1) unsets all bits in l2_present except
+ * the bottom bit. (l2_present - 2) has all the bits in
+ * the first core group set. AND them together to generate
+ * a mask of cores in the first core group.
+ */
+ u64 core_mask = ~(ptdev->gpu_info.l2_present - 1) &
+ (ptdev->gpu_info.l2_present - 2);
+ drm_info_once(&ptdev->base, "using only 1st core group (%lu cores from %lu)\n",
+ hweight64(core_mask),
+ hweight64(ptdev->gpu_info.shader_present));
+ }
+
+ return panthor_gpu_power_on(ptdev, L2, 1, 20000);
+}
+
+/**
+ * panthor_gpu_flush_caches() - Flush caches
+ * @ptdev: Device.
+ * @l2: L2 flush type.
+ * @lsc: LSC flush type.
+ * @other: Other flush type.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int panthor_gpu_flush_caches(struct panthor_device *ptdev,
+ u32 l2, u32 lsc, u32 other)
+{
+ bool timedout = false;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags);
+ if (!drm_WARN_ON(&ptdev->base,
+ ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED)) {
+ ptdev->gpu->pending_reqs |= GPU_IRQ_CLEAN_CACHES_COMPLETED;
+ gpu_write(ptdev, GPU_CMD, GPU_FLUSH_CACHES(l2, lsc, other));
+ }
+ spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags);
+
+ if (!wait_event_timeout(ptdev->gpu->reqs_acked,
+ !(ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED),
+ msecs_to_jiffies(100))) {
+ spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags);
+ if ((ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED) != 0 &&
+ !(gpu_read(ptdev, GPU_INT_RAWSTAT) & GPU_IRQ_CLEAN_CACHES_COMPLETED))
+ timedout = true;
+ else
+ ptdev->gpu->pending_reqs &= ~GPU_IRQ_CLEAN_CACHES_COMPLETED;
+ spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags);
+ }
+
+ if (timedout) {
+ drm_err(&ptdev->base, "Flush caches timeout");
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
+/**
+ * panthor_gpu_soft_reset() - Issue a soft-reset
+ * @ptdev: Device.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int panthor_gpu_soft_reset(struct panthor_device *ptdev)
+{
+ bool timedout = false;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags);
+ if (!drm_WARN_ON(&ptdev->base,
+ ptdev->gpu->pending_reqs & GPU_IRQ_RESET_COMPLETED)) {
+ ptdev->gpu->pending_reqs |= GPU_IRQ_RESET_COMPLETED;
+ gpu_write(ptdev, GPU_INT_CLEAR, GPU_IRQ_RESET_COMPLETED);
+ gpu_write(ptdev, GPU_CMD, GPU_SOFT_RESET);
+ }
+ spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags);
+
+ if (!wait_event_timeout(ptdev->gpu->reqs_acked,
+ !(ptdev->gpu->pending_reqs & GPU_IRQ_RESET_COMPLETED),
+ msecs_to_jiffies(100))) {
+ spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags);
+ if ((ptdev->gpu->pending_reqs & GPU_IRQ_RESET_COMPLETED) != 0 &&
+ !(gpu_read(ptdev, GPU_INT_RAWSTAT) & GPU_IRQ_RESET_COMPLETED))
+ timedout = true;
+ else
+ ptdev->gpu->pending_reqs &= ~GPU_IRQ_RESET_COMPLETED;
+ spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags);
+ }
+
+ if (timedout) {
+ drm_err(&ptdev->base, "Soft reset timeout");
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
+/**
+ * panthor_gpu_suspend() - Suspend the GPU block.
+ * @ptdev: Device.
+ *
+ * Suspend the GPU irq. This should be called last in the suspend procedure,
+ * after all other blocks have been suspented.
+ */
+void panthor_gpu_suspend(struct panthor_device *ptdev)
+{
+ /*
+ * It may be preferable to simply power down the L2, but for now just
+ * soft-reset which will leave the L2 powered down.
+ */
+ panthor_gpu_soft_reset(ptdev);
+ panthor_gpu_irq_suspend(&ptdev->gpu->irq);
+}
+
+/**
+ * panthor_gpu_resume() - Resume the GPU block.
+ * @ptdev: Device.
+ *
+ * Resume the IRQ handler and power-on the L2-cache.
+ * The FW takes care of powering the other blocks.
+ */
+void panthor_gpu_resume(struct panthor_device *ptdev)
+{
+ panthor_gpu_irq_resume(&ptdev->gpu->irq, GPU_INTERRUPTS_MASK);
+ panthor_gpu_l2_power_on(ptdev);
+}
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_gpu.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 or MIT */
+/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
+/* Copyright 2019 Collabora ltd. */
+
+#ifndef __PANTHOR_GPU_H__
+#define __PANTHOR_GPU_H__
+
+struct panthor_device;
+
+int panthor_gpu_init(struct panthor_device *ptdev);
+void panthor_gpu_unplug(struct panthor_device *ptdev);
+void panthor_gpu_suspend(struct panthor_device *ptdev);
+void panthor_gpu_resume(struct panthor_device *ptdev);
+
+int panthor_gpu_block_power_on(struct panthor_device *ptdev,
+ const char *blk_name,
+ u32 pwron_reg, u32 pwrtrans_reg,
+ u32 rdy_reg, u64 mask, u32 timeout_us);
+int panthor_gpu_block_power_off(struct panthor_device *ptdev,
+ const char *blk_name,
+ u32 pwroff_reg, u32 pwrtrans_reg,
+ u64 mask, u32 timeout_us);
+
+/**
+ * panthor_gpu_power_on() - Power on the GPU block.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+#define panthor_gpu_power_on(ptdev, type, mask, timeout_us) \
+ panthor_gpu_block_power_on(ptdev, #type, \
+ type ## _PWRON_LO, \
+ type ## _PWRTRANS_LO, \
+ type ## _READY_LO, \
+ mask, timeout_us)
+
+/**
+ * panthor_gpu_power_off() - Power off the GPU block.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+#define panthor_gpu_power_off(ptdev, type, mask, timeout_us) \
+ panthor_gpu_block_power_off(ptdev, #type, \
+ type ## _PWROFF_LO, \
+ type ## _PWRTRANS_LO, \
+ mask, timeout_us)
+
+int panthor_gpu_l2_power_on(struct panthor_device *ptdev);
+int panthor_gpu_flush_caches(struct panthor_device *ptdev,
+ u32 l2, u32 lsc, u32 other);
+int panthor_gpu_soft_reset(struct panthor_device *ptdev);
+
+#endif

View File

@ -1,426 +0,0 @@
From 8a1cc07578bf42d85f008316873d710ff684dd29 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Thu, 29 Feb 2024 17:22:19 +0100
Subject: [PATCH] drm/panthor: Add GEM logical block
Anything relating to GEM object management is placed here. Nothing
particularly interesting here, given the implementation is based on
drm_gem_shmem_object, which is doing most of the work.
v6:
- Add Maxime's and Heiko's acks
- Return a page-aligned BO size to userspace when creating a BO
- Keep header inclusion alphabetically ordered
v5:
- Add Liviu's and Steve's R-b
v4:
- Force kernel BOs to be GPU mapped
- Make panthor_kernel_bo_destroy() robust against ERR/NULL BO pointers
to simplify the call sites
v3:
- Add acks for the MIT/GPL2 relicensing
- Provide a panthor_kernel_bo abstraction for buffer objects managed by
the kernel (will replace panthor_fw_mem and be used everywhere we were
using panthor_gem_create_and_map() before)
- Adjust things to match drm_gpuvm changes
- Change return of panthor_gem_create_with_handle() to int
Co-developed-by: Steven Price <steven.price@arm.com>
Signed-off-by: Steven Price <steven.price@arm.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Acked-by: Steven Price <steven.price@arm.com> # MIT+GPL2 relicensing,Arm
Acked-by: Grant Likely <grant.likely@linaro.org> # MIT+GPL2 relicensing,Linaro
Acked-by: Boris Brezillon <boris.brezillon@collabora.com> # MIT+GPL2 relicensing,Collabora
Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Acked-by: Maxime Ripard <mripard@kernel.org>
Acked-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20240229162230.2634044-6-boris.brezillon@collabora.com
---
drivers/gpu/drm/panthor/panthor_gem.c | 230 ++++++++++++++++++++++++++
drivers/gpu/drm/panthor/panthor_gem.h | 142 ++++++++++++++++
2 files changed, 372 insertions(+)
create mode 100644 drivers/gpu/drm/panthor/panthor_gem.c
create mode 100644 drivers/gpu/drm/panthor/panthor_gem.h
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_gem.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0 or MIT
+/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
+/* Copyright 2023 Collabora ltd. */
+
+#include <linux/dma-buf.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+
+#include <drm/panthor_drm.h>
+
+#include "panthor_device.h"
+#include "panthor_gem.h"
+#include "panthor_mmu.h"
+
+static void panthor_gem_free_object(struct drm_gem_object *obj)
+{
+ struct panthor_gem_object *bo = to_panthor_bo(obj);
+ struct drm_gem_object *vm_root_gem = bo->exclusive_vm_root_gem;
+
+ drm_gem_free_mmap_offset(&bo->base.base);
+ mutex_destroy(&bo->gpuva_list_lock);
+ drm_gem_shmem_free(&bo->base);
+ drm_gem_object_put(vm_root_gem);
+}
+
+/**
+ * panthor_kernel_bo_destroy() - Destroy a kernel buffer object
+ * @vm: The VM this BO was mapped to.
+ * @bo: Kernel buffer object to destroy. If NULL or an ERR_PTR(), the destruction
+ * is skipped.
+ */
+void panthor_kernel_bo_destroy(struct panthor_vm *vm,
+ struct panthor_kernel_bo *bo)
+{
+ int ret;
+
+ if (IS_ERR_OR_NULL(bo))
+ return;
+
+ panthor_kernel_bo_vunmap(bo);
+
+ if (drm_WARN_ON(bo->obj->dev,
+ to_panthor_bo(bo->obj)->exclusive_vm_root_gem != panthor_vm_root_gem(vm)))
+ goto out_free_bo;
+
+ ret = panthor_vm_unmap_range(vm, bo->va_node.start,
+ panthor_kernel_bo_size(bo));
+ if (ret)
+ goto out_free_bo;
+
+ panthor_vm_free_va(vm, &bo->va_node);
+ drm_gem_object_put(bo->obj);
+
+out_free_bo:
+ kfree(bo);
+}
+
+/**
+ * panthor_kernel_bo_create() - Create and map a GEM object to a VM
+ * @ptdev: Device.
+ * @vm: VM to map the GEM to. If NULL, the kernel object is not GPU mapped.
+ * @size: Size of the buffer object.
+ * @bo_flags: Combination of drm_panthor_bo_flags flags.
+ * @vm_map_flags: Combination of drm_panthor_vm_bind_op_flags (only those
+ * that are related to map operations).
+ * @gpu_va: GPU address assigned when mapping to the VM.
+ * If gpu_va == PANTHOR_VM_KERNEL_AUTO_VA, the virtual address will be
+ * automatically allocated.
+ *
+ * Return: A valid pointer in case of success, an ERR_PTR() otherwise.
+ */
+struct panthor_kernel_bo *
+panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm,
+ size_t size, u32 bo_flags, u32 vm_map_flags,
+ u64 gpu_va)
+{
+ struct drm_gem_shmem_object *obj;
+ struct panthor_kernel_bo *kbo;
+ struct panthor_gem_object *bo;
+ int ret;
+
+ if (drm_WARN_ON(&ptdev->base, !vm))
+ return ERR_PTR(-EINVAL);
+
+ kbo = kzalloc(sizeof(*kbo), GFP_KERNEL);
+ if (!kbo)
+ return ERR_PTR(-ENOMEM);
+
+ obj = drm_gem_shmem_create(&ptdev->base, size);
+ if (IS_ERR(obj)) {
+ ret = PTR_ERR(obj);
+ goto err_free_bo;
+ }
+
+ bo = to_panthor_bo(&obj->base);
+ size = obj->base.size;
+ kbo->obj = &obj->base;
+ bo->flags = bo_flags;
+
+ ret = panthor_vm_alloc_va(vm, gpu_va, size, &kbo->va_node);
+ if (ret)
+ goto err_put_obj;
+
+ ret = panthor_vm_map_bo_range(vm, bo, 0, size, kbo->va_node.start, vm_map_flags);
+ if (ret)
+ goto err_free_va;
+
+ bo->exclusive_vm_root_gem = panthor_vm_root_gem(vm);
+ drm_gem_object_get(bo->exclusive_vm_root_gem);
+ bo->base.base.resv = bo->exclusive_vm_root_gem->resv;
+ return kbo;
+
+err_free_va:
+ panthor_vm_free_va(vm, &kbo->va_node);
+
+err_put_obj:
+ drm_gem_object_put(&obj->base);
+
+err_free_bo:
+ kfree(kbo);
+ return ERR_PTR(ret);
+}
+
+static int panthor_gem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+{
+ struct panthor_gem_object *bo = to_panthor_bo(obj);
+
+ /* Don't allow mmap on objects that have the NO_MMAP flag set. */
+ if (bo->flags & DRM_PANTHOR_BO_NO_MMAP)
+ return -EINVAL;
+
+ return drm_gem_shmem_object_mmap(obj, vma);
+}
+
+static struct dma_buf *
+panthor_gem_prime_export(struct drm_gem_object *obj, int flags)
+{
+ /* We can't export GEMs that have an exclusive VM. */
+ if (to_panthor_bo(obj)->exclusive_vm_root_gem)
+ return ERR_PTR(-EINVAL);
+
+ return drm_gem_prime_export(obj, flags);
+}
+
+static const struct drm_gem_object_funcs panthor_gem_funcs = {
+ .free = panthor_gem_free_object,
+ .print_info = drm_gem_shmem_object_print_info,
+ .pin = drm_gem_shmem_object_pin,
+ .unpin = drm_gem_shmem_object_unpin,
+ .get_sg_table = drm_gem_shmem_object_get_sg_table,
+ .vmap = drm_gem_shmem_object_vmap,
+ .vunmap = drm_gem_shmem_object_vunmap,
+ .mmap = panthor_gem_mmap,
+ .export = panthor_gem_prime_export,
+ .vm_ops = &drm_gem_shmem_vm_ops,
+};
+
+/**
+ * panthor_gem_create_object - Implementation of driver->gem_create_object.
+ * @ddev: DRM device
+ * @size: Size in bytes of the memory the object will reference
+ *
+ * This lets the GEM helpers allocate object structs for us, and keep
+ * our BO stats correct.
+ */
+struct drm_gem_object *panthor_gem_create_object(struct drm_device *ddev, size_t size)
+{
+ struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base);
+ struct panthor_gem_object *obj;
+
+ obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+ if (!obj)
+ return ERR_PTR(-ENOMEM);
+
+ obj->base.base.funcs = &panthor_gem_funcs;
+ obj->base.map_wc = !ptdev->coherent;
+ mutex_init(&obj->gpuva_list_lock);
+ drm_gem_gpuva_set_lock(&obj->base.base, &obj->gpuva_list_lock);
+
+ return &obj->base.base;
+}
+
+/**
+ * panthor_gem_create_with_handle() - Create a GEM object and attach it to a handle.
+ * @file: DRM file.
+ * @ddev: DRM device.
+ * @exclusive_vm: Exclusive VM. Not NULL if the GEM object can't be shared.
+ * @size: Size of the GEM object to allocate.
+ * @flags: Combination of drm_panthor_bo_flags flags.
+ * @handle: Pointer holding the handle pointing to the new GEM object.
+ *
+ * Return: Zero on success
+ */
+int
+panthor_gem_create_with_handle(struct drm_file *file,
+ struct drm_device *ddev,
+ struct panthor_vm *exclusive_vm,
+ u64 *size, u32 flags, u32 *handle)
+{
+ int ret;
+ struct drm_gem_shmem_object *shmem;
+ struct panthor_gem_object *bo;
+
+ shmem = drm_gem_shmem_create(ddev, *size);
+ if (IS_ERR(shmem))
+ return PTR_ERR(shmem);
+
+ bo = to_panthor_bo(&shmem->base);
+ bo->flags = flags;
+
+ if (exclusive_vm) {
+ bo->exclusive_vm_root_gem = panthor_vm_root_gem(exclusive_vm);
+ drm_gem_object_get(bo->exclusive_vm_root_gem);
+ bo->base.base.resv = bo->exclusive_vm_root_gem->resv;
+ }
+
+ /*
+ * Allocate an id of idr table where the obj is registered
+ * and handle has the id what user can see.
+ */
+ ret = drm_gem_handle_create(file, &shmem->base, handle);
+ if (!ret)
+ *size = bo->base.base.size;
+
+ /* drop reference from allocate - handle holds it now. */
+ drm_gem_object_put(&shmem->base);
+
+ return ret;
+}
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_gem.h
@@ -0,0 +1,142 @@
+/* SPDX-License-Identifier: GPL-2.0 or MIT */
+/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
+/* Copyright 2023 Collabora ltd. */
+
+#ifndef __PANTHOR_GEM_H__
+#define __PANTHOR_GEM_H__
+
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_mm.h>
+
+#include <linux/iosys-map.h>
+#include <linux/rwsem.h>
+
+struct panthor_vm;
+
+/**
+ * struct panthor_gem_object - Driver specific GEM object.
+ */
+struct panthor_gem_object {
+ /** @base: Inherit from drm_gem_shmem_object. */
+ struct drm_gem_shmem_object base;
+
+ /**
+ * @exclusive_vm_root_gem: Root GEM of the exclusive VM this GEM object
+ * is attached to.
+ *
+ * If @exclusive_vm_root_gem != NULL, any attempt to bind the GEM to a
+ * different VM will fail.
+ *
+ * All FW memory objects have this field set to the root GEM of the MCU
+ * VM.
+ */
+ struct drm_gem_object *exclusive_vm_root_gem;
+
+ /**
+ * @gpuva_list_lock: Custom GPUVA lock.
+ *
+ * Used to protect insertion of drm_gpuva elements to the
+ * drm_gem_object.gpuva.list list.
+ *
+ * We can't use the GEM resv for that, because drm_gpuva_link() is
+ * called in a dma-signaling path, where we're not allowed to take
+ * resv locks.
+ */
+ struct mutex gpuva_list_lock;
+
+ /** @flags: Combination of drm_panthor_bo_flags flags. */
+ u32 flags;
+};
+
+/**
+ * struct panthor_kernel_bo - Kernel buffer object.
+ *
+ * These objects are only manipulated by the kernel driver and not
+ * directly exposed to the userspace. The GPU address of a kernel
+ * BO might be passed to userspace though.
+ */
+struct panthor_kernel_bo {
+ /**
+ * @obj: The GEM object backing this kernel buffer object.
+ */
+ struct drm_gem_object *obj;
+
+ /**
+ * @va_node: VA space allocated to this GEM.
+ */
+ struct drm_mm_node va_node;
+
+ /**
+ * @kmap: Kernel CPU mapping of @gem.
+ */
+ void *kmap;
+};
+
+static inline
+struct panthor_gem_object *to_panthor_bo(struct drm_gem_object *obj)
+{
+ return container_of(to_drm_gem_shmem_obj(obj), struct panthor_gem_object, base);
+}
+
+struct drm_gem_object *panthor_gem_create_object(struct drm_device *ddev, size_t size);
+
+struct drm_gem_object *
+panthor_gem_prime_import_sg_table(struct drm_device *ddev,
+ struct dma_buf_attachment *attach,
+ struct sg_table *sgt);
+
+int
+panthor_gem_create_with_handle(struct drm_file *file,
+ struct drm_device *ddev,
+ struct panthor_vm *exclusive_vm,
+ u64 *size, u32 flags, uint32_t *handle);
+
+static inline u64
+panthor_kernel_bo_gpuva(struct panthor_kernel_bo *bo)
+{
+ return bo->va_node.start;
+}
+
+static inline size_t
+panthor_kernel_bo_size(struct panthor_kernel_bo *bo)
+{
+ return bo->obj->size;
+}
+
+static inline int
+panthor_kernel_bo_vmap(struct panthor_kernel_bo *bo)
+{
+ struct iosys_map map;
+ int ret;
+
+ if (bo->kmap)
+ return 0;
+
+ ret = drm_gem_vmap_unlocked(bo->obj, &map);
+ if (ret)
+ return ret;
+
+ bo->kmap = map.vaddr;
+ return 0;
+}
+
+static inline void
+panthor_kernel_bo_vunmap(struct panthor_kernel_bo *bo)
+{
+ if (bo->kmap) {
+ struct iosys_map map = IOSYS_MAP_INIT_VADDR(bo->kmap);
+
+ drm_gem_vunmap_unlocked(bo->obj, &map);
+ bo->kmap = NULL;
+ }
+}
+
+struct panthor_kernel_bo *
+panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm,
+ size_t size, u32 bo_flags, u32 vm_map_flags,
+ u64 gpu_va);
+
+void panthor_kernel_bo_destroy(struct panthor_vm *vm,
+ struct panthor_kernel_bo *bo);
+
+#endif /* __PANTHOR_GEM_H__ */

View File

@ -1,356 +0,0 @@
From fac9b22df4b1108f7fa5a087a77f922489861484 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Thu, 29 Feb 2024 17:22:20 +0100
Subject: [PATCH] drm/panthor: Add the devfreq logical block
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Every thing related to devfreq in placed in panthor_devfreq.c, and
helpers that can be called by other logical blocks are exposed through
panthor_devfreq.h.
This implementation is loosely based on the panfrost implementation,
the only difference being that we don't count device users, because
the idle/active state will be managed by the scheduler logic.
v6:
- Add Maxime's and Heiko's acks
- Keep header inclusion alphabetically ordered
v4:
- Add Clément's A-b for the relicensing
v3:
- Add acks for the MIT/GPL2 relicensing
v2:
- Added in v2
Cc: Clément Péron <peron.clem@gmail.com> # MIT+GPL2 relicensing
Reviewed-by: Steven Price <steven.price@arm.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Acked-by: Steven Price <steven.price@arm.com> # MIT+GPL2 relicensing,Arm
Acked-by: Grant Likely <grant.likely@linaro.org> # MIT+GPL2 relicensing,Linaro
Acked-by: Boris Brezillon <boris.brezillon@collabora.com> # MIT+GPL2 relicensing,Collabora
Acked-by: Clément Péron <peron.clem@gmail.com> # MIT+GPL2 relicensing
Acked-by: Maxime Ripard <mripard@kernel.org>
Acked-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20240229162230.2634044-7-boris.brezillon@collabora.com
---
drivers/gpu/drm/panthor/panthor_devfreq.c | 283 ++++++++++++++++++++++
drivers/gpu/drm/panthor/panthor_devfreq.h | 21 ++
2 files changed, 304 insertions(+)
create mode 100644 drivers/gpu/drm/panthor/panthor_devfreq.c
create mode 100644 drivers/gpu/drm/panthor/panthor_devfreq.h
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_devfreq.c
@@ -0,0 +1,283 @@
+// SPDX-License-Identifier: GPL-2.0 or MIT
+/* Copyright 2019 Collabora ltd. */
+
+#include <linux/clk.h>
+#include <linux/devfreq.h>
+#include <linux/devfreq_cooling.h>
+#include <linux/platform_device.h>
+#include <linux/pm_opp.h>
+
+#include <drm/drm_managed.h>
+
+#include "panthor_devfreq.h"
+#include "panthor_device.h"
+
+/**
+ * struct panthor_devfreq - Device frequency management
+ */
+struct panthor_devfreq {
+ /** @devfreq: devfreq device. */
+ struct devfreq *devfreq;
+
+ /** @gov_data: Governor data. */
+ struct devfreq_simple_ondemand_data gov_data;
+
+ /** @busy_time: Busy time. */
+ ktime_t busy_time;
+
+ /** @idle_time: Idle time. */
+ ktime_t idle_time;
+
+ /** @time_last_update: Last update time. */
+ ktime_t time_last_update;
+
+ /** @last_busy_state: True if the GPU was busy last time we updated the state. */
+ bool last_busy_state;
+
+ /*
+ * @lock: Lock used to protect busy_time, idle_time, time_last_update and
+ * last_busy_state.
+ *
+ * These fields can be accessed concurrently by panthor_devfreq_get_dev_status()
+ * and panthor_devfreq_record_{busy,idle}().
+ */
+ spinlock_t lock;
+};
+
+static void panthor_devfreq_update_utilization(struct panthor_devfreq *pdevfreq)
+{
+ ktime_t now, last;
+
+ now = ktime_get();
+ last = pdevfreq->time_last_update;
+
+ if (pdevfreq->last_busy_state)
+ pdevfreq->busy_time += ktime_sub(now, last);
+ else
+ pdevfreq->idle_time += ktime_sub(now, last);
+
+ pdevfreq->time_last_update = now;
+}
+
+static int panthor_devfreq_target(struct device *dev, unsigned long *freq,
+ u32 flags)
+{
+ struct dev_pm_opp *opp;
+
+ opp = devfreq_recommended_opp(dev, freq, flags);
+ if (IS_ERR(opp))
+ return PTR_ERR(opp);
+ dev_pm_opp_put(opp);
+
+ return dev_pm_opp_set_rate(dev, *freq);
+}
+
+static void panthor_devfreq_reset(struct panthor_devfreq *pdevfreq)
+{
+ pdevfreq->busy_time = 0;
+ pdevfreq->idle_time = 0;
+ pdevfreq->time_last_update = ktime_get();
+}
+
+static int panthor_devfreq_get_dev_status(struct device *dev,
+ struct devfreq_dev_status *status)
+{
+ struct panthor_device *ptdev = dev_get_drvdata(dev);
+ struct panthor_devfreq *pdevfreq = ptdev->devfreq;
+ unsigned long irqflags;
+
+ status->current_frequency = clk_get_rate(ptdev->clks.core);
+
+ spin_lock_irqsave(&pdevfreq->lock, irqflags);
+
+ panthor_devfreq_update_utilization(pdevfreq);
+
+ status->total_time = ktime_to_ns(ktime_add(pdevfreq->busy_time,
+ pdevfreq->idle_time));
+
+ status->busy_time = ktime_to_ns(pdevfreq->busy_time);
+
+ panthor_devfreq_reset(pdevfreq);
+
+ spin_unlock_irqrestore(&pdevfreq->lock, irqflags);
+
+ drm_dbg(&ptdev->base, "busy %lu total %lu %lu %% freq %lu MHz\n",
+ status->busy_time, status->total_time,
+ status->busy_time / (status->total_time / 100),
+ status->current_frequency / 1000 / 1000);
+
+ return 0;
+}
+
+static struct devfreq_dev_profile panthor_devfreq_profile = {
+ .timer = DEVFREQ_TIMER_DELAYED,
+ .polling_ms = 50, /* ~3 frames */
+ .target = panthor_devfreq_target,
+ .get_dev_status = panthor_devfreq_get_dev_status,
+};
+
+int panthor_devfreq_init(struct panthor_device *ptdev)
+{
+ /* There's actually 2 regulators (mali and sram), but the OPP core only
+ * supports one.
+ *
+ * We assume the sram regulator is coupled with the mali one and let
+ * the coupling logic deal with voltage updates.
+ */
+ static const char * const reg_names[] = { "mali", NULL };
+ struct thermal_cooling_device *cooling;
+ struct device *dev = ptdev->base.dev;
+ struct panthor_devfreq *pdevfreq;
+ struct dev_pm_opp *opp;
+ unsigned long cur_freq;
+ int ret;
+
+ pdevfreq = drmm_kzalloc(&ptdev->base, sizeof(*ptdev->devfreq), GFP_KERNEL);
+ if (!pdevfreq)
+ return -ENOMEM;
+
+ ptdev->devfreq = pdevfreq;
+
+ ret = devm_pm_opp_set_regulators(dev, reg_names);
+ if (ret) {
+ if (ret != -EPROBE_DEFER)
+ DRM_DEV_ERROR(dev, "Couldn't set OPP regulators\n");
+
+ return ret;
+ }
+
+ ret = devm_pm_opp_of_add_table(dev);
+ if (ret)
+ return ret;
+
+ spin_lock_init(&pdevfreq->lock);
+
+ panthor_devfreq_reset(pdevfreq);
+
+ cur_freq = clk_get_rate(ptdev->clks.core);
+
+ opp = devfreq_recommended_opp(dev, &cur_freq, 0);
+ if (IS_ERR(opp))
+ return PTR_ERR(opp);
+
+ panthor_devfreq_profile.initial_freq = cur_freq;
+
+ /* Regulator coupling only takes care of synchronizing/balancing voltage
+ * updates, but the coupled regulator needs to be enabled manually.
+ *
+ * We use devm_regulator_get_enable_optional() and keep the sram supply
+ * enabled until the device is removed, just like we do for the mali
+ * supply, which is enabled when dev_pm_opp_set_opp(dev, opp) is called,
+ * and disabled when the opp_table is torn down, using the devm action.
+ *
+ * If we really care about disabling regulators on suspend, we should:
+ * - use devm_regulator_get_optional() here
+ * - call dev_pm_opp_set_opp(dev, NULL) before leaving this function
+ * (this disables the regulator passed to the OPP layer)
+ * - call dev_pm_opp_set_opp(dev, NULL) and
+ * regulator_disable(ptdev->regulators.sram) in
+ * panthor_devfreq_suspend()
+ * - call dev_pm_opp_set_opp(dev, default_opp) and
+ * regulator_enable(ptdev->regulators.sram) in
+ * panthor_devfreq_resume()
+ *
+ * But without knowing if it's beneficial or not (in term of power
+ * consumption), or how much it slows down the suspend/resume steps,
+ * let's just keep regulators enabled for the device lifetime.
+ */
+ ret = devm_regulator_get_enable_optional(dev, "sram");
+ if (ret && ret != -ENODEV) {
+ if (ret != -EPROBE_DEFER)
+ DRM_DEV_ERROR(dev, "Couldn't retrieve/enable sram supply\n");
+ return ret;
+ }
+
+ /*
+ * Set the recommend OPP this will enable and configure the regulator
+ * if any and will avoid a switch off by regulator_late_cleanup()
+ */
+ ret = dev_pm_opp_set_opp(dev, opp);
+ if (ret) {
+ DRM_DEV_ERROR(dev, "Couldn't set recommended OPP\n");
+ return ret;
+ }
+
+ dev_pm_opp_put(opp);
+
+ /*
+ * Setup default thresholds for the simple_ondemand governor.
+ * The values are chosen based on experiments.
+ */
+ pdevfreq->gov_data.upthreshold = 45;
+ pdevfreq->gov_data.downdifferential = 5;
+
+ pdevfreq->devfreq = devm_devfreq_add_device(dev, &panthor_devfreq_profile,
+ DEVFREQ_GOV_SIMPLE_ONDEMAND,
+ &pdevfreq->gov_data);
+ if (IS_ERR(pdevfreq->devfreq)) {
+ DRM_DEV_ERROR(dev, "Couldn't initialize GPU devfreq\n");
+ ret = PTR_ERR(pdevfreq->devfreq);
+ pdevfreq->devfreq = NULL;
+ return ret;
+ }
+
+ cooling = devfreq_cooling_em_register(pdevfreq->devfreq, NULL);
+ if (IS_ERR(cooling))
+ DRM_DEV_INFO(dev, "Failed to register cooling device\n");
+
+ return 0;
+}
+
+int panthor_devfreq_resume(struct panthor_device *ptdev)
+{
+ struct panthor_devfreq *pdevfreq = ptdev->devfreq;
+
+ if (!pdevfreq->devfreq)
+ return 0;
+
+ panthor_devfreq_reset(pdevfreq);
+
+ return devfreq_resume_device(pdevfreq->devfreq);
+}
+
+int panthor_devfreq_suspend(struct panthor_device *ptdev)
+{
+ struct panthor_devfreq *pdevfreq = ptdev->devfreq;
+
+ if (!pdevfreq->devfreq)
+ return 0;
+
+ return devfreq_suspend_device(pdevfreq->devfreq);
+}
+
+void panthor_devfreq_record_busy(struct panthor_device *ptdev)
+{
+ struct panthor_devfreq *pdevfreq = ptdev->devfreq;
+ unsigned long irqflags;
+
+ if (!pdevfreq->devfreq)
+ return;
+
+ spin_lock_irqsave(&pdevfreq->lock, irqflags);
+
+ panthor_devfreq_update_utilization(pdevfreq);
+ pdevfreq->last_busy_state = true;
+
+ spin_unlock_irqrestore(&pdevfreq->lock, irqflags);
+}
+
+void panthor_devfreq_record_idle(struct panthor_device *ptdev)
+{
+ struct panthor_devfreq *pdevfreq = ptdev->devfreq;
+ unsigned long irqflags;
+
+ if (!pdevfreq->devfreq)
+ return;
+
+ spin_lock_irqsave(&pdevfreq->lock, irqflags);
+
+ panthor_devfreq_update_utilization(pdevfreq);
+ pdevfreq->last_busy_state = false;
+
+ spin_unlock_irqrestore(&pdevfreq->lock, irqflags);
+}
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_devfreq.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 or MIT */
+/* Copyright 2019 Collabora ltd. */
+
+#ifndef __PANTHOR_DEVFREQ_H__
+#define __PANTHOR_DEVFREQ_H__
+
+struct devfreq;
+struct thermal_cooling_device;
+
+struct panthor_device;
+struct panthor_devfreq;
+
+int panthor_devfreq_init(struct panthor_device *ptdev);
+
+int panthor_devfreq_resume(struct panthor_device *ptdev);
+int panthor_devfreq_suspend(struct panthor_device *ptdev);
+
+void panthor_devfreq_record_busy(struct panthor_device *ptdev);
+void panthor_devfreq_record_idle(struct panthor_device *ptdev);
+
+#endif /* __PANTHOR_DEVFREQ_H__ */

View File

@ -1,696 +0,0 @@
From 9cca48fa4f8933a2dadf2f011d461329ca0a8337 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Thu, 29 Feb 2024 17:22:23 +0100
Subject: [PATCH] drm/panthor: Add the heap logical block
Tiler heap growing requires some kernel driver involvement: when the
tiler runs out of heap memory, it will raise an exception which is
either directly handled by the firmware if some free heap chunks are
available in the heap context, or passed back to the kernel otherwise.
The heap helpers will be used by the scheduler logic to allocate more
heap chunks to a heap context, when such a situation happens.
Heap context creation is explicitly requested by userspace (using
the TILER_HEAP_CREATE ioctl), and the returned context is attached to a
queue through some command stream instruction.
All the kernel does is keep the list of heap chunks allocated to a
context, so they can be freed when TILER_HEAP_DESTROY is called, or
extended when the FW requests a new chunk.
v6:
- Add Maxime's and Heiko's acks
v5:
- Fix FIXME comment
- Add Steve's R-b
v4:
- Rework locking to allow concurrent calls to panthor_heap_grow()
- Add a helper to return a heap chunk if we couldn't pass it to the
FW because the group was scheduled out
v3:
- Add a FIXME for the heap OOM deadlock
- Use the panthor_kernel_bo abstraction for the heap context and heap
chunks
- Drop the panthor_heap_gpu_ctx struct as it is opaque to the driver
- Ensure that the heap context is aligned to the GPU cache line size
- Minor code tidy ups
Co-developed-by: Steven Price <steven.price@arm.com>
Signed-off-by: Steven Price <steven.price@arm.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Acked-by: Maxime Ripard <mripard@kernel.org>
Acked-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20240229162230.2634044-10-boris.brezillon@collabora.com
---
drivers/gpu/drm/panthor/panthor_heap.c | 597 +++++++++++++++++++++++++
drivers/gpu/drm/panthor/panthor_heap.h | 39 ++
2 files changed, 636 insertions(+)
create mode 100644 drivers/gpu/drm/panthor/panthor_heap.c
create mode 100644 drivers/gpu/drm/panthor/panthor_heap.h
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_heap.c
@@ -0,0 +1,597 @@
+// SPDX-License-Identifier: GPL-2.0 or MIT
+/* Copyright 2023 Collabora ltd. */
+
+#include <linux/iosys-map.h>
+#include <linux/rwsem.h>
+
+#include <drm/panthor_drm.h>
+
+#include "panthor_device.h"
+#include "panthor_gem.h"
+#include "panthor_heap.h"
+#include "panthor_mmu.h"
+#include "panthor_regs.h"
+
+/*
+ * The GPU heap context is an opaque structure used by the GPU to track the
+ * heap allocations. The driver should only touch it to initialize it (zero all
+ * fields). Because the CPU and GPU can both access this structure it is
+ * required to be GPU cache line aligned.
+ */
+#define HEAP_CONTEXT_SIZE 32
+
+/**
+ * struct panthor_heap_chunk_header - Heap chunk header
+ */
+struct panthor_heap_chunk_header {
+ /**
+ * @next: Next heap chunk in the list.
+ *
+ * This is a GPU VA.
+ */
+ u64 next;
+
+ /** @unknown: MBZ. */
+ u32 unknown[14];
+};
+
+/**
+ * struct panthor_heap_chunk - Structure used to keep track of allocated heap chunks.
+ */
+struct panthor_heap_chunk {
+ /** @node: Used to insert the heap chunk in panthor_heap::chunks. */
+ struct list_head node;
+
+ /** @bo: Buffer object backing the heap chunk. */
+ struct panthor_kernel_bo *bo;
+};
+
+/**
+ * struct panthor_heap - Structure used to manage tiler heap contexts.
+ */
+struct panthor_heap {
+ /** @chunks: List containing all heap chunks allocated so far. */
+ struct list_head chunks;
+
+ /** @lock: Lock protecting insertion in the chunks list. */
+ struct mutex lock;
+
+ /** @chunk_size: Size of each chunk. */
+ u32 chunk_size;
+
+ /** @max_chunks: Maximum number of chunks. */
+ u32 max_chunks;
+
+ /**
+ * @target_in_flight: Number of in-flight render passes after which
+ * we'd let the FW wait for fragment job to finish instead of allocating new chunks.
+ */
+ u32 target_in_flight;
+
+ /** @chunk_count: Number of heap chunks currently allocated. */
+ u32 chunk_count;
+};
+
+#define MAX_HEAPS_PER_POOL 128
+
+/**
+ * struct panthor_heap_pool - Pool of heap contexts
+ *
+ * The pool is attached to a panthor_file and can't be shared across processes.
+ */
+struct panthor_heap_pool {
+ /** @refcount: Reference count. */
+ struct kref refcount;
+
+ /** @ptdev: Device. */
+ struct panthor_device *ptdev;
+
+ /** @vm: VM this pool is bound to. */
+ struct panthor_vm *vm;
+
+ /** @lock: Lock protecting access to @xa. */
+ struct rw_semaphore lock;
+
+ /** @xa: Array storing panthor_heap objects. */
+ struct xarray xa;
+
+ /** @gpu_contexts: Buffer object containing the GPU heap contexts. */
+ struct panthor_kernel_bo *gpu_contexts;
+};
+
+static int panthor_heap_ctx_stride(struct panthor_device *ptdev)
+{
+ u32 l2_features = ptdev->gpu_info.l2_features;
+ u32 gpu_cache_line_size = GPU_L2_FEATURES_LINE_SIZE(l2_features);
+
+ return ALIGN(HEAP_CONTEXT_SIZE, gpu_cache_line_size);
+}
+
+static int panthor_get_heap_ctx_offset(struct panthor_heap_pool *pool, int id)
+{
+ return panthor_heap_ctx_stride(pool->ptdev) * id;
+}
+
+static void *panthor_get_heap_ctx(struct panthor_heap_pool *pool, int id)
+{
+ return pool->gpu_contexts->kmap +
+ panthor_get_heap_ctx_offset(pool, id);
+}
+
+static void panthor_free_heap_chunk(struct panthor_vm *vm,
+ struct panthor_heap *heap,
+ struct panthor_heap_chunk *chunk)
+{
+ mutex_lock(&heap->lock);
+ list_del(&chunk->node);
+ heap->chunk_count--;
+ mutex_unlock(&heap->lock);
+
+ panthor_kernel_bo_destroy(vm, chunk->bo);
+ kfree(chunk);
+}
+
+static int panthor_alloc_heap_chunk(struct panthor_device *ptdev,
+ struct panthor_vm *vm,
+ struct panthor_heap *heap,
+ bool initial_chunk)
+{
+ struct panthor_heap_chunk *chunk;
+ struct panthor_heap_chunk_header *hdr;
+ int ret;
+
+ chunk = kmalloc(sizeof(*chunk), GFP_KERNEL);
+ if (!chunk)
+ return -ENOMEM;
+
+ chunk->bo = panthor_kernel_bo_create(ptdev, vm, heap->chunk_size,
+ DRM_PANTHOR_BO_NO_MMAP,
+ DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
+ PANTHOR_VM_KERNEL_AUTO_VA);
+ if (IS_ERR(chunk->bo)) {
+ ret = PTR_ERR(chunk->bo);
+ goto err_free_chunk;
+ }
+
+ ret = panthor_kernel_bo_vmap(chunk->bo);
+ if (ret)
+ goto err_destroy_bo;
+
+ hdr = chunk->bo->kmap;
+ memset(hdr, 0, sizeof(*hdr));
+
+ if (initial_chunk && !list_empty(&heap->chunks)) {
+ struct panthor_heap_chunk *prev_chunk;
+ u64 prev_gpuva;
+
+ prev_chunk = list_first_entry(&heap->chunks,
+ struct panthor_heap_chunk,
+ node);
+
+ prev_gpuva = panthor_kernel_bo_gpuva(prev_chunk->bo);
+ hdr->next = (prev_gpuva & GENMASK_ULL(63, 12)) |
+ (heap->chunk_size >> 12);
+ }
+
+ panthor_kernel_bo_vunmap(chunk->bo);
+
+ mutex_lock(&heap->lock);
+ list_add(&chunk->node, &heap->chunks);
+ heap->chunk_count++;
+ mutex_unlock(&heap->lock);
+
+ return 0;
+
+err_destroy_bo:
+ panthor_kernel_bo_destroy(vm, chunk->bo);
+
+err_free_chunk:
+ kfree(chunk);
+
+ return ret;
+}
+
+static void panthor_free_heap_chunks(struct panthor_vm *vm,
+ struct panthor_heap *heap)
+{
+ struct panthor_heap_chunk *chunk, *tmp;
+
+ list_for_each_entry_safe(chunk, tmp, &heap->chunks, node)
+ panthor_free_heap_chunk(vm, heap, chunk);
+}
+
+static int panthor_alloc_heap_chunks(struct panthor_device *ptdev,
+ struct panthor_vm *vm,
+ struct panthor_heap *heap,
+ u32 chunk_count)
+{
+ int ret;
+ u32 i;
+
+ for (i = 0; i < chunk_count; i++) {
+ ret = panthor_alloc_heap_chunk(ptdev, vm, heap, true);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int
+panthor_heap_destroy_locked(struct panthor_heap_pool *pool, u32 handle)
+{
+ struct panthor_heap *heap;
+
+ heap = xa_erase(&pool->xa, handle);
+ if (!heap)
+ return -EINVAL;
+
+ panthor_free_heap_chunks(pool->vm, heap);
+ mutex_destroy(&heap->lock);
+ kfree(heap);
+ return 0;
+}
+
+/**
+ * panthor_heap_destroy() - Destroy a heap context
+ * @pool: Pool this context belongs to.
+ * @handle: Handle returned by panthor_heap_create().
+ */
+int panthor_heap_destroy(struct panthor_heap_pool *pool, u32 handle)
+{
+ int ret;
+
+ down_write(&pool->lock);
+ ret = panthor_heap_destroy_locked(pool, handle);
+ up_write(&pool->lock);
+
+ return ret;
+}
+
+/**
+ * panthor_heap_create() - Create a heap context
+ * @pool: Pool to instantiate the heap context from.
+ * @initial_chunk_count: Number of chunk allocated at initialization time.
+ * Must be at least 1.
+ * @chunk_size: The size of each chunk. Must be a power of two between 256k
+ * and 2M.
+ * @max_chunks: Maximum number of chunks that can be allocated.
+ * @target_in_flight: Maximum number of in-flight render passes.
+ * @heap_ctx_gpu_va: Pointer holding the GPU address of the allocated heap
+ * context.
+ * @first_chunk_gpu_va: Pointer holding the GPU address of the first chunk
+ * assigned to the heap context.
+ *
+ * Return: a positive handle on success, a negative error otherwise.
+ */
+int panthor_heap_create(struct panthor_heap_pool *pool,
+ u32 initial_chunk_count,
+ u32 chunk_size,
+ u32 max_chunks,
+ u32 target_in_flight,
+ u64 *heap_ctx_gpu_va,
+ u64 *first_chunk_gpu_va)
+{
+ struct panthor_heap *heap;
+ struct panthor_heap_chunk *first_chunk;
+ struct panthor_vm *vm;
+ int ret = 0;
+ u32 id;
+
+ if (initial_chunk_count == 0)
+ return -EINVAL;
+
+ if (hweight32(chunk_size) != 1 ||
+ chunk_size < SZ_256K || chunk_size > SZ_2M)
+ return -EINVAL;
+
+ down_read(&pool->lock);
+ vm = panthor_vm_get(pool->vm);
+ up_read(&pool->lock);
+
+ /* The pool has been destroyed, we can't create a new heap. */
+ if (!vm)
+ return -EINVAL;
+
+ heap = kzalloc(sizeof(*heap), GFP_KERNEL);
+ if (!heap) {
+ ret = -ENOMEM;
+ goto err_put_vm;
+ }
+
+ mutex_init(&heap->lock);
+ INIT_LIST_HEAD(&heap->chunks);
+ heap->chunk_size = chunk_size;
+ heap->max_chunks = max_chunks;
+ heap->target_in_flight = target_in_flight;
+
+ ret = panthor_alloc_heap_chunks(pool->ptdev, vm, heap,
+ initial_chunk_count);
+ if (ret)
+ goto err_free_heap;
+
+ first_chunk = list_first_entry(&heap->chunks,
+ struct panthor_heap_chunk,
+ node);
+ *first_chunk_gpu_va = panthor_kernel_bo_gpuva(first_chunk->bo);
+
+ down_write(&pool->lock);
+ /* The pool has been destroyed, we can't create a new heap. */
+ if (!pool->vm) {
+ ret = -EINVAL;
+ } else {
+ ret = xa_alloc(&pool->xa, &id, heap, XA_LIMIT(1, MAX_HEAPS_PER_POOL), GFP_KERNEL);
+ if (!ret) {
+ void *gpu_ctx = panthor_get_heap_ctx(pool, id);
+
+ memset(gpu_ctx, 0, panthor_heap_ctx_stride(pool->ptdev));
+ *heap_ctx_gpu_va = panthor_kernel_bo_gpuva(pool->gpu_contexts) +
+ panthor_get_heap_ctx_offset(pool, id);
+ }
+ }
+ up_write(&pool->lock);
+
+ if (ret)
+ goto err_free_heap;
+
+ panthor_vm_put(vm);
+ return id;
+
+err_free_heap:
+ panthor_free_heap_chunks(pool->vm, heap);
+ mutex_destroy(&heap->lock);
+ kfree(heap);
+
+err_put_vm:
+ panthor_vm_put(vm);
+ return ret;
+}
+
+/**
+ * panthor_heap_return_chunk() - Return an unused heap chunk
+ * @pool: The pool this heap belongs to.
+ * @heap_gpu_va: The GPU address of the heap context.
+ * @chunk_gpu_va: The chunk VA to return.
+ *
+ * This function is used when a chunk allocated with panthor_heap_grow()
+ * couldn't be linked to the heap context through the FW interface because
+ * the group requesting the allocation was scheduled out in the meantime.
+ */
+int panthor_heap_return_chunk(struct panthor_heap_pool *pool,
+ u64 heap_gpu_va,
+ u64 chunk_gpu_va)
+{
+ u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts);
+ u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev);
+ struct panthor_heap_chunk *chunk, *tmp, *removed = NULL;
+ struct panthor_heap *heap;
+ int ret;
+
+ if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL)
+ return -EINVAL;
+
+ down_read(&pool->lock);
+ heap = xa_load(&pool->xa, heap_id);
+ if (!heap) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ chunk_gpu_va &= GENMASK_ULL(63, 12);
+
+ mutex_lock(&heap->lock);
+ list_for_each_entry_safe(chunk, tmp, &heap->chunks, node) {
+ if (panthor_kernel_bo_gpuva(chunk->bo) == chunk_gpu_va) {
+ removed = chunk;
+ list_del(&chunk->node);
+ heap->chunk_count--;
+ break;
+ }
+ }
+ mutex_unlock(&heap->lock);
+
+ if (removed) {
+ panthor_kernel_bo_destroy(pool->vm, chunk->bo);
+ kfree(chunk);
+ ret = 0;
+ } else {
+ ret = -EINVAL;
+ }
+
+out_unlock:
+ up_read(&pool->lock);
+ return ret;
+}
+
+/**
+ * panthor_heap_grow() - Make a heap context grow.
+ * @pool: The pool this heap belongs to.
+ * @heap_gpu_va: The GPU address of the heap context.
+ * @renderpasses_in_flight: Number of render passes currently in-flight.
+ * @pending_frag_count: Number of fragment jobs waiting for execution/completion.
+ * @new_chunk_gpu_va: Pointer used to return the chunk VA.
+ */
+int panthor_heap_grow(struct panthor_heap_pool *pool,
+ u64 heap_gpu_va,
+ u32 renderpasses_in_flight,
+ u32 pending_frag_count,
+ u64 *new_chunk_gpu_va)
+{
+ u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts);
+ u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev);
+ struct panthor_heap_chunk *chunk;
+ struct panthor_heap *heap;
+ int ret;
+
+ if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL)
+ return -EINVAL;
+
+ down_read(&pool->lock);
+ heap = xa_load(&pool->xa, heap_id);
+ if (!heap) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ /* If we reached the target in-flight render passes, or if we
+ * reached the maximum number of chunks, let the FW figure another way to
+ * find some memory (wait for render passes to finish, or call the exception
+ * handler provided by the userspace driver, if any).
+ */
+ if (renderpasses_in_flight > heap->target_in_flight ||
+ (pending_frag_count > 0 && heap->chunk_count >= heap->max_chunks)) {
+ ret = -EBUSY;
+ goto out_unlock;
+ } else if (heap->chunk_count >= heap->max_chunks) {
+ ret = -ENOMEM;
+ goto out_unlock;
+ }
+
+ /* FIXME: panthor_alloc_heap_chunk() triggers a kernel BO creation,
+ * which goes through the blocking allocation path. Ultimately, we
+ * want a non-blocking allocation, so we can immediately report to the
+ * FW when the system is running out of memory. In that case, the FW
+ * can call a user-provided exception handler, which might try to free
+ * some tiler memory by issuing an intermediate fragment job. If the
+ * exception handler can't do anything, it will flag the queue as
+ * faulty so the job that triggered this tiler chunk allocation and all
+ * further jobs in this queue fail immediately instead of having to
+ * wait for the job timeout.
+ */
+ ret = panthor_alloc_heap_chunk(pool->ptdev, pool->vm, heap, false);
+ if (ret)
+ goto out_unlock;
+
+ chunk = list_first_entry(&heap->chunks,
+ struct panthor_heap_chunk,
+ node);
+ *new_chunk_gpu_va = (panthor_kernel_bo_gpuva(chunk->bo) & GENMASK_ULL(63, 12)) |
+ (heap->chunk_size >> 12);
+ ret = 0;
+
+out_unlock:
+ up_read(&pool->lock);
+ return ret;
+}
+
+static void panthor_heap_pool_release(struct kref *refcount)
+{
+ struct panthor_heap_pool *pool =
+ container_of(refcount, struct panthor_heap_pool, refcount);
+
+ xa_destroy(&pool->xa);
+ kfree(pool);
+}
+
+/**
+ * panthor_heap_pool_put() - Release a heap pool reference
+ * @pool: Pool to release the reference on. Can be NULL.
+ */
+void panthor_heap_pool_put(struct panthor_heap_pool *pool)
+{
+ if (pool)
+ kref_put(&pool->refcount, panthor_heap_pool_release);
+}
+
+/**
+ * panthor_heap_pool_get() - Get a heap pool reference
+ * @pool: Pool to get the reference on. Can be NULL.
+ *
+ * Return: @pool.
+ */
+struct panthor_heap_pool *
+panthor_heap_pool_get(struct panthor_heap_pool *pool)
+{
+ if (pool)
+ kref_get(&pool->refcount);
+
+ return pool;
+}
+
+/**
+ * panthor_heap_pool_create() - Create a heap pool
+ * @ptdev: Device.
+ * @vm: The VM this heap pool will be attached to.
+ *
+ * Heap pools might contain up to 128 heap contexts, and are per-VM.
+ *
+ * Return: A valid pointer on success, a negative error code otherwise.
+ */
+struct panthor_heap_pool *
+panthor_heap_pool_create(struct panthor_device *ptdev, struct panthor_vm *vm)
+{
+ size_t bosize = ALIGN(MAX_HEAPS_PER_POOL *
+ panthor_heap_ctx_stride(ptdev),
+ 4096);
+ struct panthor_heap_pool *pool;
+ int ret = 0;
+
+ pool = kzalloc(sizeof(*pool), GFP_KERNEL);
+ if (!pool)
+ return ERR_PTR(-ENOMEM);
+
+ /* We want a weak ref here: the heap pool belongs to the VM, so we're
+ * sure that, as long as the heap pool exists, the VM exists too.
+ */
+ pool->vm = vm;
+ pool->ptdev = ptdev;
+ init_rwsem(&pool->lock);
+ xa_init_flags(&pool->xa, XA_FLAGS_ALLOC1);
+ kref_init(&pool->refcount);
+
+ pool->gpu_contexts = panthor_kernel_bo_create(ptdev, vm, bosize,
+ DRM_PANTHOR_BO_NO_MMAP,
+ DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
+ PANTHOR_VM_KERNEL_AUTO_VA);
+ if (IS_ERR(pool->gpu_contexts)) {
+ ret = PTR_ERR(pool->gpu_contexts);
+ goto err_destroy_pool;
+ }
+
+ ret = panthor_kernel_bo_vmap(pool->gpu_contexts);
+ if (ret)
+ goto err_destroy_pool;
+
+ return pool;
+
+err_destroy_pool:
+ panthor_heap_pool_destroy(pool);
+ return ERR_PTR(ret);
+}
+
+/**
+ * panthor_heap_pool_destroy() - Destroy a heap pool.
+ * @pool: Pool to destroy.
+ *
+ * This function destroys all heap contexts and their resources. Thus
+ * preventing any use of the heap context or the chunk attached to them
+ * after that point.
+ *
+ * If the GPU still has access to some heap contexts, a fault should be
+ * triggered, which should flag the command stream groups using these
+ * context as faulty.
+ *
+ * The heap pool object is only released when all references to this pool
+ * are released.
+ */
+void panthor_heap_pool_destroy(struct panthor_heap_pool *pool)
+{
+ struct panthor_heap *heap;
+ unsigned long i;
+
+ if (!pool)
+ return;
+
+ down_write(&pool->lock);
+ xa_for_each(&pool->xa, i, heap)
+ drm_WARN_ON(&pool->ptdev->base, panthor_heap_destroy_locked(pool, i));
+
+ if (!IS_ERR_OR_NULL(pool->gpu_contexts))
+ panthor_kernel_bo_destroy(pool->vm, pool->gpu_contexts);
+
+ /* Reflects the fact the pool has been destroyed. */
+ pool->vm = NULL;
+ up_write(&pool->lock);
+
+ panthor_heap_pool_put(pool);
+}
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_heap.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 or MIT */
+/* Copyright 2023 Collabora ltd. */
+
+#ifndef __PANTHOR_HEAP_H__
+#define __PANTHOR_HEAP_H__
+
+#include <linux/types.h>
+
+struct panthor_device;
+struct panthor_heap_pool;
+struct panthor_vm;
+
+int panthor_heap_create(struct panthor_heap_pool *pool,
+ u32 initial_chunk_count,
+ u32 chunk_size,
+ u32 max_chunks,
+ u32 target_in_flight,
+ u64 *heap_ctx_gpu_va,
+ u64 *first_chunk_gpu_va);
+int panthor_heap_destroy(struct panthor_heap_pool *pool, u32 handle);
+
+struct panthor_heap_pool *
+panthor_heap_pool_create(struct panthor_device *ptdev, struct panthor_vm *vm);
+void panthor_heap_pool_destroy(struct panthor_heap_pool *pool);
+
+struct panthor_heap_pool *
+panthor_heap_pool_get(struct panthor_heap_pool *pool);
+void panthor_heap_pool_put(struct panthor_heap_pool *pool);
+
+int panthor_heap_grow(struct panthor_heap_pool *pool,
+ u64 heap_gpu_va,
+ u32 renderpasses_in_flight,
+ u32 pending_frag_count,
+ u64 *new_chunk_gpu_va);
+int panthor_heap_return_chunk(struct panthor_heap_pool *pool,
+ u64 heap_gpu_va,
+ u64 chunk_gpu_va);
+
+#endif

View File

@ -1,104 +0,0 @@
From d72f049087d4f973f6332b599c92177e718107de Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Thu, 29 Feb 2024 17:22:26 +0100
Subject: [PATCH] drm/panthor: Allow driver compilation
Now that all blocks are available, we can add/update Kconfig/Makefile
files to allow compilation.
v6:
- Add Maxime's and Heiko's acks
- Keep source files alphabetically ordered in the Makefile
v4:
- Add Steve's R-b
v3:
- Add a dep on DRM_GPUVM
- Fix dependencies in Kconfig
- Expand help text to (hopefully) describe which GPUs are to be
supported by this driver and which are for panfrost.
Co-developed-by: Steven Price <steven.price@arm.com>
Signed-off-by: Steven Price <steven.price@arm.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Acked-by: Steven Price <steven.price@arm.com> # MIT+GPL2 relicensing,Arm
Acked-by: Grant Likely <grant.likely@linaro.org> # MIT+GPL2 relicensing,Linaro
Acked-by: Boris Brezillon <boris.brezillon@collabora.com> # MIT+GPL2 relicensing,Collabora
Reviewed-by: Steven Price <steven.price@arm.com>
Acked-by: Maxime Ripard <mripard@kernel.org>
Acked-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20240229162230.2634044-13-boris.brezillon@collabora.com
---
drivers/gpu/drm/Kconfig | 2 ++
drivers/gpu/drm/Makefile | 1 +
drivers/gpu/drm/panthor/Kconfig | 23 +++++++++++++++++++++++
drivers/gpu/drm/panthor/Makefile | 14 ++++++++++++++
4 files changed, 40 insertions(+)
create mode 100644 drivers/gpu/drm/panthor/Kconfig
create mode 100644 drivers/gpu/drm/panthor/Makefile
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -381,6 +381,8 @@ source "drivers/gpu/drm/lima/Kconfig"
source "drivers/gpu/drm/panfrost/Kconfig"
+source "drivers/gpu/drm/panthor/Kconfig"
+
source "drivers/gpu/drm/aspeed/Kconfig"
source "drivers/gpu/drm/mcde/Kconfig"
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -189,6 +189,7 @@ obj-$(CONFIG_DRM_XEN) += xen/
obj-$(CONFIG_DRM_VBOXVIDEO) += vboxvideo/
obj-$(CONFIG_DRM_LIMA) += lima/
obj-$(CONFIG_DRM_PANFROST) += panfrost/
+obj-$(CONFIG_DRM_PANTHOR) += panthor/
obj-$(CONFIG_DRM_ASPEED_GFX) += aspeed/
obj-$(CONFIG_DRM_MCDE) += mcde/
obj-$(CONFIG_DRM_TIDSS) += tidss/
--- /dev/null
+++ b/drivers/gpu/drm/panthor/Kconfig
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: GPL-2.0 or MIT
+
+config DRM_PANTHOR
+ tristate "Panthor (DRM support for ARM Mali CSF-based GPUs)"
+ depends on DRM
+ depends on ARM || ARM64 || COMPILE_TEST
+ depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_LPAE
+ depends on MMU
+ select DEVFREQ_GOV_SIMPLE_ONDEMAND
+ select DRM_EXEC
+ select DRM_GEM_SHMEM_HELPER
+ select DRM_GPUVM
+ select DRM_SCHED
+ select IOMMU_IO_PGTABLE_LPAE
+ select IOMMU_SUPPORT
+ select PM_DEVFREQ
+ help
+ DRM driver for ARM Mali CSF-based GPUs.
+
+ This driver is for Mali (or Immortalis) Valhall Gxxx GPUs.
+
+ Note that the Mali-G68 and Mali-G78, while Valhall architecture, will
+ be supported with the panfrost driver as they are not CSF GPUs.
--- /dev/null
+++ b/drivers/gpu/drm/panthor/Makefile
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0 or MIT
+
+panthor-y := \
+ panthor_devfreq.o \
+ panthor_device.o \
+ panthor_drv.o \
+ panthor_fw.o \
+ panthor_gem.o \
+ panthor_gpu.o \
+ panthor_heap.o \
+ panthor_mmu.o \
+ panthor_sched.o
+
+obj-$(CONFIG_DRM_PANTHOR) += panthor.o

View File

@ -1,27 +0,0 @@
From 9c86b03863844ce69f99aa66404c79492ec9e208 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Mon, 4 Mar 2024 10:08:10 +0100
Subject: [PATCH] drm/panthor: Fix panthor_devfreq kerneldoc
Missing '*' to have a valid kerneldoc prefix.
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202403031019.6jvrOqGT-lkp@intel.com/
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240304090812.3941084-2-boris.brezillon@collabora.com
---
drivers/gpu/drm/panthor/panthor_devfreq.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/drivers/gpu/drm/panthor/panthor_devfreq.c
+++ b/drivers/gpu/drm/panthor/panthor_devfreq.c
@@ -34,7 +34,7 @@ struct panthor_devfreq {
/** @last_busy_state: True if the GPU was busy last time we updated the state. */
bool last_busy_state;
- /*
+ /**
* @lock: Lock used to protect busy_time, idle_time, time_last_update and
* last_busy_state.
*

View File

@ -1,31 +0,0 @@
From eb1dc10a6ee3559310436ab62db93b72310a2a18 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Mon, 4 Mar 2024 10:08:11 +0100
Subject: [PATCH] drm/panthor: Explicitly include mm.h for the {virt,
__phys)_to_pfn() defs
Something on arm[64] must be including <asm/page.h>, but things fail
to compile on sparc64. Make sure this header is included (through
linux/mm.h) so this driver can be compile-tested on all supported
architectures.
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202403031142.Vl4pW7X6-lkp@intel.com/
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240304090812.3941084-3-boris.brezillon@collabora.com
---
drivers/gpu/drm/panthor/panthor_device.c | 1 +
1 file changed, 1 insertion(+)
--- a/drivers/gpu/drm/panthor/panthor_device.c
+++ b/drivers/gpu/drm/panthor/panthor_device.c
@@ -4,6 +4,7 @@
/* Copyright 2023 Collabora ltd. */
#include <linux/clk.h>
+#include <linux/mm.h>
#include <linux/platform_device.h>
#include <linux/pm_domain.h>
#include <linux/pm_runtime.h>

View File

@ -1,75 +0,0 @@
From 0cd8363ed802922e39446d783f767b3e09335ddc Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Mon, 18 Mar 2024 16:31:17 +0100
Subject: [PATCH] drm/panthor: Fix the CONFIG_PM=n case
Putting a hard dependency on CONFIG_PM is not possible because of a
circular dependency issue, and it's actually not desirable either. In
order to support this use case, we forcibly resume at init time, and
suspend at unplug time.
v2:
- Drop the #ifdef CONFIG_PM section around panthor_pm_ops's definition
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202403031944.EOimQ8WK-lkp@intel.com/
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240318153117.1321544-1-boris.brezillon@collabora.com
---
drivers/gpu/drm/panthor/panthor_device.c | 13 +++++++++++--
drivers/gpu/drm/panthor/panthor_drv.c | 2 +-
2 files changed, 12 insertions(+), 3 deletions(-)
--- a/drivers/gpu/drm/panthor/panthor_device.c
+++ b/drivers/gpu/drm/panthor/panthor_device.c
@@ -87,6 +87,10 @@ void panthor_device_unplug(struct pantho
pm_runtime_dont_use_autosuspend(ptdev->base.dev);
pm_runtime_put_sync_suspend(ptdev->base.dev);
+ /* If PM is disabled, we need to call the suspend handler manually. */
+ if (!IS_ENABLED(CONFIG_PM))
+ panthor_device_suspend(ptdev->base.dev);
+
/* Report the unplug operation as done to unblock concurrent
* panthor_device_unplug() callers.
*/
@@ -218,6 +222,13 @@ int panthor_device_init(struct panthor_d
if (ret)
return ret;
+ /* If PM is disabled, we need to call panthor_device_resume() manually. */
+ if (!IS_ENABLED(CONFIG_PM)) {
+ ret = panthor_device_resume(ptdev->base.dev);
+ if (ret)
+ return ret;
+ }
+
ret = panthor_gpu_init(ptdev);
if (ret)
goto err_rpm_put;
@@ -402,7 +413,6 @@ int panthor_device_mmap_io(struct pantho
return 0;
}
-#ifdef CONFIG_PM
int panthor_device_resume(struct device *dev)
{
struct panthor_device *ptdev = dev_get_drvdata(dev);
@@ -547,4 +557,3 @@ err_set_active:
mutex_unlock(&ptdev->pm.mmio_lock);
return ret;
}
-#endif
--- a/drivers/gpu/drm/panthor/panthor_drv.c
+++ b/drivers/gpu/drm/panthor/panthor_drv.c
@@ -1416,7 +1416,7 @@ static struct platform_driver panthor_dr
.remove_new = panthor_remove,
.driver = {
.name = "panthor",
- .pm = &panthor_pm_ops,
+ .pm = pm_ptr(&panthor_pm_ops),
.of_match_table = dt_match,
},
};

View File

@ -1,74 +0,0 @@
From 0b45921c2a8831834a5f8a52ddd0b25b5b1c6faf Mon Sep 17 00:00:00 2001
From: Steven Price <steven.price@arm.com>
Date: Mon, 18 Mar 2024 14:51:19 +0000
Subject: [PATCH] drm/panthor: Don't use virt_to_pfn()
virt_to_pfn() isn't available on x86 (except to xen) so breaks
COMPILE_TEST builds. Avoid its use completely by instead storing the
struct page pointer allocated in panthor_device_init() and using
page_to_pfn() instead.
Signed-off-by: Steven Price <steven.price@arm.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240318145119.368582-1-steven.price@arm.com
---
drivers/gpu/drm/panthor/panthor_device.c | 10 ++++++----
drivers/gpu/drm/panthor/panthor_device.h | 2 +-
2 files changed, 7 insertions(+), 5 deletions(-)
--- a/drivers/gpu/drm/panthor/panthor_device.c
+++ b/drivers/gpu/drm/panthor/panthor_device.c
@@ -151,11 +151,12 @@ static bool panthor_device_is_initialize
static void panthor_device_free_page(struct drm_device *ddev, void *data)
{
- free_page((unsigned long)data);
+ __free_page(data);
}
int panthor_device_init(struct panthor_device *ptdev)
{
+ u32 *dummy_page_virt;
struct resource *res;
struct page *p;
int ret;
@@ -176,7 +177,8 @@ int panthor_device_init(struct panthor_d
if (!p)
return -ENOMEM;
- ptdev->pm.dummy_latest_flush = page_address(p);
+ ptdev->pm.dummy_latest_flush = p;
+ dummy_page_virt = page_address(p);
ret = drmm_add_action_or_reset(&ptdev->base, panthor_device_free_page,
ptdev->pm.dummy_latest_flush);
if (ret)
@@ -188,7 +190,7 @@ int panthor_device_init(struct panthor_d
* happens while the dummy page is mapped. Zero cannot be used because
* that means 'always flush'.
*/
- *ptdev->pm.dummy_latest_flush = 1;
+ *dummy_page_virt = 1;
INIT_WORK(&ptdev->reset.work, panthor_device_reset_work);
ptdev->reset.wq = alloc_ordered_workqueue("panthor-reset-wq", 0);
@@ -364,7 +366,7 @@ static vm_fault_t panthor_mmio_vm_fault(
if (active)
pfn = __phys_to_pfn(ptdev->phys_addr + CSF_GPU_LATEST_FLUSH_ID);
else
- pfn = virt_to_pfn(ptdev->pm.dummy_latest_flush);
+ pfn = page_to_pfn(ptdev->pm.dummy_latest_flush);
break;
default:
--- a/drivers/gpu/drm/panthor/panthor_device.h
+++ b/drivers/gpu/drm/panthor/panthor_device.h
@@ -160,7 +160,7 @@ struct panthor_device {
* Used to replace the real LATEST_FLUSH page when the GPU
* is suspended.
*/
- u32 *dummy_latest_flush;
+ struct page *dummy_latest_flush;
} pm;
};

View File

@ -1,26 +0,0 @@
From 9d1848778e56fb565db041e4237a2f27f9277f63 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Tue, 26 Mar 2024 10:02:19 +0000
Subject: [PATCH] drm/panthor: Fix spelling mistake "readyness" -> "readiness"
There is a spelling mistake in a drm_err message. Fix it.
Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Acked-by: Liviu Dudau <liviu.dudau@arm.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240326100219.43989-1-colin.i.king@gmail.com
---
drivers/gpu/drm/panthor/panthor_gpu.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/drivers/gpu/drm/panthor/panthor_gpu.c
+++ b/drivers/gpu/drm/panthor/panthor_gpu.c
@@ -333,7 +333,7 @@ int panthor_gpu_block_power_on(struct pa
val, (mask32 & val) == mask32,
100, timeout_us);
if (ret) {
- drm_err(&ptdev->base, "timeout waiting on %s:%llx readyness",
+ drm_err(&ptdev->base, "timeout waiting on %s:%llx readiness",
blk_name, mask);
return ret;
}

View File

@ -1,142 +0,0 @@
From 11f0275cc1b90b4b9bf37a5ebc27c0a9b2451b4e Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Tue, 26 Mar 2024 12:12:03 +0100
Subject: [PATCH] drm/panthor: Fix IO-page mmap() for 32-bit userspace on
64-bit kernel
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
When mapping an IO region, the pseudo-file offset is dependent on the
userspace architecture. panthor_device_mmio_offset() abstracts that
away for us by turning a userspace MMIO offset into its kernel
equivalent, but we were not updating vm_area_struct::vm_pgoff
accordingly, leading us to attach the MMIO region to the wrong file
offset.
This has implications when we start mixing 64 bit and 32 bit apps, but
that's only really a problem when we start having more that 2^43 bytes of
memory allocated, which is very unlikely to happen.
What's more problematic is the fact this turns our
unmap_mapping_range(DRM_PANTHOR_USER_MMIO_OFFSET) calls, which are
supposed to kill the MMIO mapping when entering suspend, into NOPs.
Which means we either keep the dummy flush_id mapping active at all
times, or we risk a BUS_FAULT if the MMIO region was mapped, and the
GPU is suspended after that.
Solve that by patching vm_pgoff early in panthor_mmap(). With
this in place, we no longer need the panthor_device_mmio_offset()
helper.
v3:
- No changes
v2:
- Kill panthor_device_mmio_offset()
Fixes: 5fe909cae118 ("drm/panthor: Add the device logical block")
Reported-by: Adrián Larumbe <adrian.larumbe@collabora.com>
Reported-by: Lukas F. Hartmann <lukas@mntmn.com>
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/10835
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240326111205.510019-1-boris.brezillon@collabora.com
---
drivers/gpu/drm/panthor/panthor_device.c | 8 ++++----
drivers/gpu/drm/panthor/panthor_device.h | 24 ------------------------
drivers/gpu/drm/panthor/panthor_drv.c | 17 ++++++++++++++++-
3 files changed, 20 insertions(+), 29 deletions(-)
--- a/drivers/gpu/drm/panthor/panthor_device.c
+++ b/drivers/gpu/drm/panthor/panthor_device.c
@@ -348,7 +348,7 @@ static vm_fault_t panthor_mmio_vm_fault(
{
struct vm_area_struct *vma = vmf->vma;
struct panthor_device *ptdev = vma->vm_private_data;
- u64 id = (u64)vma->vm_pgoff << PAGE_SHIFT;
+ u64 offset = (u64)vma->vm_pgoff << PAGE_SHIFT;
unsigned long pfn;
pgprot_t pgprot;
vm_fault_t ret;
@@ -361,7 +361,7 @@ static vm_fault_t panthor_mmio_vm_fault(
mutex_lock(&ptdev->pm.mmio_lock);
active = atomic_read(&ptdev->pm.state) == PANTHOR_DEVICE_PM_STATE_ACTIVE;
- switch (panthor_device_mmio_offset(id)) {
+ switch (offset) {
case DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET:
if (active)
pfn = __phys_to_pfn(ptdev->phys_addr + CSF_GPU_LATEST_FLUSH_ID);
@@ -392,9 +392,9 @@ static const struct vm_operations_struct
int panthor_device_mmap_io(struct panthor_device *ptdev, struct vm_area_struct *vma)
{
- u64 id = (u64)vma->vm_pgoff << PAGE_SHIFT;
+ u64 offset = (u64)vma->vm_pgoff << PAGE_SHIFT;
- switch (panthor_device_mmio_offset(id)) {
+ switch (offset) {
case DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET:
if (vma->vm_end - vma->vm_start != PAGE_SIZE ||
(vma->vm_flags & (VM_WRITE | VM_EXEC)))
--- a/drivers/gpu/drm/panthor/panthor_device.h
+++ b/drivers/gpu/drm/panthor/panthor_device.h
@@ -365,30 +365,6 @@ static int panthor_request_ ## __name ##
pirq); \
}
-/**
- * panthor_device_mmio_offset() - Turn a user MMIO offset into a kernel one
- * @offset: Offset to convert.
- *
- * With 32-bit systems being limited by the 32-bit representation of mmap2's
- * pgoffset field, we need to make the MMIO offset arch specific. This function
- * converts a user MMIO offset into something the kernel driver understands.
- *
- * If the kernel and userspace architecture match, the offset is unchanged. If
- * the kernel is 64-bit and userspace is 32-bit, the offset is adjusted to match
- * 64-bit offsets. 32-bit kernel with 64-bit userspace is impossible.
- *
- * Return: Adjusted offset.
- */
-static inline u64 panthor_device_mmio_offset(u64 offset)
-{
-#ifdef CONFIG_ARM64
- if (test_tsk_thread_flag(current, TIF_32BIT))
- offset += DRM_PANTHOR_USER_MMIO_OFFSET_64BIT - DRM_PANTHOR_USER_MMIO_OFFSET_32BIT;
-#endif
-
- return offset;
-}
-
extern struct workqueue_struct *panthor_cleanup_wq;
#endif
--- a/drivers/gpu/drm/panthor/panthor_drv.c
+++ b/drivers/gpu/drm/panthor/panthor_drv.c
@@ -1326,7 +1326,22 @@ static int panthor_mmap(struct file *fil
if (!drm_dev_enter(file->minor->dev, &cookie))
return -ENODEV;
- if (panthor_device_mmio_offset(offset) >= DRM_PANTHOR_USER_MMIO_OFFSET)
+#ifdef CONFIG_ARM64
+ /*
+ * With 32-bit systems being limited by the 32-bit representation of
+ * mmap2's pgoffset field, we need to make the MMIO offset arch
+ * specific. This converts a user MMIO offset into something the kernel
+ * driver understands.
+ */
+ if (test_tsk_thread_flag(current, TIF_32BIT) &&
+ offset >= DRM_PANTHOR_USER_MMIO_OFFSET_32BIT) {
+ offset += DRM_PANTHOR_USER_MMIO_OFFSET_64BIT -
+ DRM_PANTHOR_USER_MMIO_OFFSET_32BIT;
+ vma->vm_pgoff = offset >> PAGE_SHIFT;
+ }
+#endif
+
+ if (offset >= DRM_PANTHOR_USER_MMIO_OFFSET)
ret = panthor_device_mmap_io(ptdev, vma);
else
ret = drm_gem_mmap(filp, vma);

View File

@ -1,55 +0,0 @@
From 1de434e0b2757061b09b347264f1ff5bdf996e58 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Tue, 26 Mar 2024 12:12:04 +0100
Subject: [PATCH] drm/panthor: Fix ordering in _irq_suspend()
Make sure we set suspended=true last to avoid generating an irq storm
in the unlikely case where an IRQ happens between the suspended=true
assignment and the _INT_MASK update.
We also move the mask=0 assignment before writing to the _INT_MASK
register to prevent the thread handler from unmasking the interrupt
behind our back. This means we might lose events if there were some
pending when we get to suspend the IRQ, but that's fine.
The synchronize_irq() we have in the _irq_suspend() path was not
there to make sure all IRQs are processed, just to make sure we don't
have registers accesses coming from the irq handlers after
_irq_suspend() has been called. If there's a need to have all pending
IRQs processed, it should happen before _irq_suspend() is called.
v3:
- Add Steve's R-b
v2:
- New patch
Fixes: 5fe909cae118 ("drm/panthor: Add the device logical block")
Reported-by: Steven Price <steven.price@arm.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Acked-by: Liviu Dudau <liviu.dudau@arm.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240326111205.510019-2-boris.brezillon@collabora.com
---
drivers/gpu/drm/panthor/panthor_device.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/drivers/gpu/drm/panthor/panthor_device.h
+++ b/drivers/gpu/drm/panthor/panthor_device.h
@@ -325,7 +325,7 @@ static inline void panthor_ ## __name ##
{ \
int cookie; \
\
- atomic_set(&pirq->suspended, true); \
+ pirq->mask = 0; \
\
if (drm_dev_enter(&pirq->ptdev->base, &cookie)) { \
gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, 0); \
@@ -333,7 +333,7 @@ static inline void panthor_ ## __name ##
drm_dev_exit(cookie); \
} \
\
- pirq->mask = 0; \
+ atomic_set(&pirq->suspended, true); \
} \
\
static inline void panthor_ ## __name ## _irq_resume(struct panthor_irq *pirq, u32 mask) \

View File

@ -1,56 +0,0 @@
From 962f88b9c91647f3ff4a0d3709662641baed5164 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Tue, 26 Mar 2024 12:12:05 +0100
Subject: [PATCH] drm/panthor: Drop the dev_enter/exit() sections in
_irq_suspend/resume()
There's no reason for _irq_suspend/resume() to be called after the
device has been unplugged, and keeping this dev_enter/exit()
section in _irq_suspend() is turns _irq_suspend() into a NOP
when called from the _unplug() functions, which we don't want.
v3:
- New patch
Fixes: 5fe909cae118 ("drm/panthor: Add the device logical block")
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240326111205.510019-3-boris.brezillon@collabora.com
---
drivers/gpu/drm/panthor/panthor_device.h | 17 ++++-------------
1 file changed, 4 insertions(+), 13 deletions(-)
--- a/drivers/gpu/drm/panthor/panthor_device.h
+++ b/drivers/gpu/drm/panthor/panthor_device.h
@@ -326,13 +326,8 @@ static inline void panthor_ ## __name ##
int cookie; \
\
pirq->mask = 0; \
- \
- if (drm_dev_enter(&pirq->ptdev->base, &cookie)) { \
- gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, 0); \
- synchronize_irq(pirq->irq); \
- drm_dev_exit(cookie); \
- } \
- \
+ gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, 0); \
+ synchronize_irq(pirq->irq); \
atomic_set(&pirq->suspended, true); \
} \
\
@@ -342,12 +337,8 @@ static inline void panthor_ ## __name ##
\
atomic_set(&pirq->suspended, false); \
pirq->mask = mask; \
- \
- if (drm_dev_enter(&pirq->ptdev->base, &cookie)) { \
- gpu_write(pirq->ptdev, __reg_prefix ## _INT_CLEAR, mask); \
- gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, mask); \
- drm_dev_exit(cookie); \
- } \
+ gpu_write(pirq->ptdev, __reg_prefix ## _INT_CLEAR, mask); \
+ gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, mask); \
} \
\
static int panthor_request_ ## __name ## _irq(struct panthor_device *ptdev, \

View File

@ -1,46 +0,0 @@
From d76653c32dd16d78e56208b4819134e766257c06 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Thu, 28 Mar 2024 09:22:07 -0700
Subject: [PATCH] drm/panthor: Fix clang -Wunused-but-set-variable in
tick_ctx_apply()
Clang warns (or errors with CONFIG_WERROR):
drivers/gpu/drm/panthor/panthor_sched.c:2048:6: error: variable 'csg_mod_mask' set but not used [-Werror,-Wunused-but-set-variable]
2048 | u32 csg_mod_mask = 0, free_csg_slots = 0;
| ^
1 error generated.
The variable is an artifact left over from refactoring that occurred
during the development of the initial series for this driver. Remove it
to resolve the warning.
Fixes: de8548813824 ("drm/panthor: Add the scheduler logical block")
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Steven Price <steven.price@arm.com>
Reviewed-by: Justin Stitt <justinstitt@google.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240328-panthor-drop-csg_mod_mask-v1-1-5a80be3df581@kernel.org
---
drivers/gpu/drm/panthor/panthor_sched.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -2045,7 +2045,7 @@ tick_ctx_apply(struct panthor_scheduler
struct panthor_device *ptdev = sched->ptdev;
struct panthor_csg_slot *csg_slot;
int prio, new_csg_prio = MAX_CSG_PRIO, i;
- u32 csg_mod_mask = 0, free_csg_slots = 0;
+ u32 free_csg_slots = 0;
struct panthor_csg_slots_upd_ctx upd_ctx;
int ret;
@@ -2139,7 +2139,6 @@ tick_ctx_apply(struct panthor_scheduler
csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
csg_slot = &sched->csg_slots[csg_id];
- csg_mod_mask |= BIT(csg_id);
group_bind_locked(group, csg_id);
csg_slot_prog_locked(ptdev, csg_id, new_csg_prio--);
csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,

View File

@ -1,40 +0,0 @@
From 00044169de061dac8d9da2cf930757c53006adff Mon Sep 17 00:00:00 2001
From: Liviu Dudau <liviu.dudau@arm.com>
Date: Tue, 2 Apr 2024 22:54:22 +0100
Subject: [PATCH] drm/panthor: Cleanup unused variable 'cookie'
Commit 962f88b9c916 ("drm/panthor: Drop the dev_enter/exit() sections in
_irq_suspend/resume()") removed the code that used the 'cookie' variable
but left the declaration in place. Remove it.
Fixes: 962f88b9c916 ("drm/panthor: Drop the dev_enter/exit() sections in _irq_suspend/resume()")
Cc: Boris Brezillon <boris.brezillon@collabora.com>
Cc: Steven Price <steven.price@arm.com>
Signed-off-by: Liviu Dudau <liviu.dudau@arm.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240402215423.360341-1-liviu.dudau@arm.com
---
drivers/gpu/drm/panthor/panthor_device.h | 4 ----
1 file changed, 4 deletions(-)
--- a/drivers/gpu/drm/panthor/panthor_device.h
+++ b/drivers/gpu/drm/panthor/panthor_device.h
@@ -323,8 +323,6 @@ static irqreturn_t panthor_ ## __name ##
\
static inline void panthor_ ## __name ## _irq_suspend(struct panthor_irq *pirq) \
{ \
- int cookie; \
- \
pirq->mask = 0; \
gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, 0); \
synchronize_irq(pirq->irq); \
@@ -333,8 +331,6 @@ static inline void panthor_ ## __name ##
\
static inline void panthor_ ## __name ## _irq_resume(struct panthor_irq *pirq, u32 mask) \
{ \
- int cookie; \
- \
atomic_set(&pirq->suspended, false); \
pirq->mask = mask; \
gpu_write(pirq->ptdev, __reg_prefix ## _INT_CLEAR, mask); \

View File

@ -1,58 +0,0 @@
From be7ffc821f5fc2eb30944562a04901c10892cc7c Mon Sep 17 00:00:00 2001
From: Liviu Dudau <liviu.dudau@arm.com>
Date: Tue, 2 Apr 2024 22:54:23 +0100
Subject: [PATCH] drm/panthor: Fix some kerneldoc warnings
When compiling with W=1 the build process will flag empty comments,
misnamed documented variables and incorrect tagging of functions.
Fix them in one go.
Fixes: de8548813824 ("drm/panthor: Add the scheduler logical block")
Cc: Boris Brezillon <boris.brezillon@collabora.com>
Cc: Steven Price <steven.price@arm.com>
Signed-off-by: Liviu Dudau <liviu.dudau@arm.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240402215423.360341-2-liviu.dudau@arm.com
---
drivers/gpu/drm/panthor/panthor_sched.c | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -517,7 +517,7 @@ struct panthor_group {
/** @max_compute_cores: Maximum number of shader cores used for compute jobs. */
u8 max_compute_cores;
- /** @max_compute_cores: Maximum number of shader cores used for fragment jobs. */
+ /** @max_fragment_cores: Maximum number of shader cores used for fragment jobs. */
u8 max_fragment_cores;
/** @max_tiler_cores: Maximum number of tiler cores used for tiler jobs. */
@@ -993,7 +993,7 @@ cs_slot_prog_locked(struct panthor_devic
}
/**
- * @cs_slot_reset_locked() - Reset a queue slot
+ * cs_slot_reset_locked() - Reset a queue slot
* @ptdev: Device.
* @csg_id: Group slot.
* @cs_id: Queue slot.
@@ -1591,7 +1591,7 @@ static void sched_process_idle_event_loc
}
/**
- * panthor_sched_process_global_irq() - Process the scheduling part of a global IRQ
+ * sched_process_global_irq_locked() - Process the scheduling part of a global IRQ
* @ptdev: Device.
*/
static void sched_process_global_irq_locked(struct panthor_device *ptdev)
@@ -1660,8 +1660,6 @@ static const struct dma_fence_ops pantho
.get_timeline_name = queue_fence_get_timeline_name,
};
-/**
- */
struct panthor_csg_slots_upd_ctx {
u32 update_mask;
u32 timedout_mask;

View File

@ -1,42 +0,0 @@
From d33733263a550775c7574169f62bf144f74d8f9a Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@linaro.org>
Date: Tue, 2 Apr 2024 12:58:09 +0300
Subject: [PATCH] drm/panthor: Fix a couple -ENOMEM error codes
These error paths forgot to set the error code to -ENOMEM.
Fixes: 647810ec2476 ("drm/panthor: Add the MMU/VM logical block")
Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/cf5bbba5-427e-4940-b91e-925f9fa71f8d@moroto.mountain
---
drivers/gpu/drm/panthor/panthor_mmu.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
--- a/drivers/gpu/drm/panthor/panthor_mmu.c
+++ b/drivers/gpu/drm/panthor/panthor_mmu.c
@@ -1264,8 +1264,10 @@ static int panthor_vm_prepare_map_op_ctx
op_ctx->rsvd_page_tables.pages = kcalloc(pt_count,
sizeof(*op_ctx->rsvd_page_tables.pages),
GFP_KERNEL);
- if (!op_ctx->rsvd_page_tables.pages)
+ if (!op_ctx->rsvd_page_tables.pages) {
+ ret = -ENOMEM;
goto err_cleanup;
+ }
ret = kmem_cache_alloc_bulk(pt_cache, GFP_KERNEL, pt_count,
op_ctx->rsvd_page_tables.pages);
@@ -1318,8 +1320,10 @@ static int panthor_vm_prepare_unmap_op_c
op_ctx->rsvd_page_tables.pages = kcalloc(pt_count,
sizeof(*op_ctx->rsvd_page_tables.pages),
GFP_KERNEL);
- if (!op_ctx->rsvd_page_tables.pages)
+ if (!op_ctx->rsvd_page_tables.pages) {
+ ret = -ENOMEM;
goto err_cleanup;
+ }
ret = kmem_cache_alloc_bulk(pt_cache, GFP_KERNEL, pt_count,
op_ctx->rsvd_page_tables.pages);

View File

@ -1,33 +0,0 @@
From 99b74db1e27145bdf0afb85559aa70d951569ac3 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@linaro.org>
Date: Tue, 2 Apr 2024 12:56:19 +0300
Subject: [PATCH] drm/panthor: Fix error code in panthor_gpu_init()
This code accidentally returns zero/success on error because of a typo.
It should be "irq" instead of "ret". The other thing is that if
platform_get_irq_byname() were to return zero then the error code would
be cmplicated. Fortunately, it does not so we can just change <= to
< 0.
Fixes: 5cd894e258c4 ("drm/panthor: Add the GPU logical block")
Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/d753e684-43ee-45c2-a1fd-86222da204e1@moroto.mountain
---
drivers/gpu/drm/panthor/panthor_gpu.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/drivers/gpu/drm/panthor/panthor_gpu.c
+++ b/drivers/gpu/drm/panthor/panthor_gpu.c
@@ -211,8 +211,8 @@ int panthor_gpu_init(struct panthor_devi
return ret;
irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "gpu");
- if (irq <= 0)
- return ret;
+ if (irq < 0)
+ return irq;
ret = panthor_request_gpu_irq(ptdev, &ptdev->gpu->irq, irq, GPU_INTERRUPTS_MASK);
if (ret)

View File

@ -1,28 +0,0 @@
From 2b5890786014b926f845402ae80ebc71c4bd6d5c Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@linaro.org>
Date: Tue, 2 Apr 2024 12:56:42 +0300
Subject: [PATCH] drm/panthor: Fix off by one in panthor_fw_get_cs_iface()
The ->iface.streams[csg_slot][] array has MAX_CS_PER_CSG elements so
this > comparison needs to be >= to prevent an out of bounds access.
Fixes: 2718d91816ee ("drm/panthor: Add the FW logical block")
Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/62835c16-c85c-483d-a8fe-63be78d49d15@moroto.mountain
---
drivers/gpu/drm/panthor/panthor_fw.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/drivers/gpu/drm/panthor/panthor_fw.c
+++ b/drivers/gpu/drm/panthor/panthor_fw.c
@@ -308,7 +308,7 @@ panthor_fw_get_csg_iface(struct panthor_
struct panthor_fw_cs_iface *
panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot)
{
- if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS || cs_slot > MAX_CS_PER_CSG))
+ if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS || cs_slot >= MAX_CS_PER_CSG))
return NULL;
return &ptdev->fw->iface.streams[csg_slot][cs_slot];

View File

@ -1,58 +0,0 @@
From 45c734fdd43db14444025910b4c59dd2b8be714a Mon Sep 17 00:00:00 2001
From: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
Date: Tue, 2 Apr 2024 07:14:11 -0700
Subject: [PATCH] drm/panthor: Don't return NULL from
panthor_vm_get_heap_pool()
The kernel doc says this function returns either a valid pointer
or an ERR_PTR(), but in practice this function can return NULL if
create=false. Fix the function to match the doc (return
ERR_PTR(-ENOENT) instead of NULL) and adjust all call-sites
accordingly.
Fixes: 4bdca1150792 ("drm/panthor: Add the driver frontend block")
Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240402141412.1707949-1-harshit.m.mogalapalli@oracle.com
---
drivers/gpu/drm/panthor/panthor_drv.c | 4 ++--
drivers/gpu/drm/panthor/panthor_mmu.c | 2 ++
drivers/gpu/drm/panthor/panthor_sched.c | 2 +-
3 files changed, 5 insertions(+), 3 deletions(-)
--- a/drivers/gpu/drm/panthor/panthor_drv.c
+++ b/drivers/gpu/drm/panthor/panthor_drv.c
@@ -1089,8 +1089,8 @@ static int panthor_ioctl_tiler_heap_dest
return -EINVAL;
pool = panthor_vm_get_heap_pool(vm, false);
- if (!pool) {
- ret = -EINVAL;
+ if (IS_ERR(pool)) {
+ ret = PTR_ERR(pool);
goto out_put_vm;
}
--- a/drivers/gpu/drm/panthor/panthor_mmu.c
+++ b/drivers/gpu/drm/panthor/panthor_mmu.c
@@ -1897,6 +1897,8 @@ struct panthor_heap_pool *panthor_vm_get
vm->heaps.pool = panthor_heap_pool_get(pool);
} else {
pool = panthor_heap_pool_get(vm->heaps.pool);
+ if (!pool)
+ pool = ERR_PTR(-ENOENT);
}
mutex_unlock(&vm->heaps.lock);
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -1343,7 +1343,7 @@ static int group_process_tiler_oom(struc
if (unlikely(csg_id < 0))
return 0;
- if (!heaps || frag_end > vt_end || vt_end >= vt_start) {
+ if (IS_ERR(heaps) || frag_end > vt_end || vt_end >= vt_start) {
ret = -EINVAL;
} else {
/* We do the allocation without holding the scheduler lock to avoid

View File

@ -1,28 +0,0 @@
From 6e0718f21feda0ed97f932cee39b676817e457f2 Mon Sep 17 00:00:00 2001
From: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
Date: Tue, 2 Apr 2024 03:40:40 -0700
Subject: [PATCH] drm/panthor: Fix NULL vs IS_ERR() bug in panthor_probe()
The devm_drm_dev_alloc() function returns error pointers.
Update the error handling to check for error pointers instead of NULL.
Fixes: 4bdca1150792 ("drm/panthor: Add the driver frontend block")
Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240402104041.1689951-1-harshit.m.mogalapalli@oracle.com
---
drivers/gpu/drm/panthor/panthor_drv.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/drivers/gpu/drm/panthor/panthor_drv.c
+++ b/drivers/gpu/drm/panthor/panthor_drv.c
@@ -1399,7 +1399,7 @@ static int panthor_probe(struct platform
ptdev = devm_drm_dev_alloc(&pdev->dev, &panthor_drm_driver,
struct panthor_device, base);
- if (!ptdev)
+ if (IS_ERR(ptdev))
return -ENOMEM;
platform_set_drvdata(pdev, ptdev);

View File

@ -1,34 +0,0 @@
From a9b7dfd1d1f96be3a3f92128e9d78719a8d65939 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@linaro.org>
Date: Mon, 8 Apr 2024 10:36:35 +0300
Subject: [PATCH] drm/panthor: clean up some types in panthor_sched_suspend()
These variables should be u32 instead of u64 because they're only
storing u32 values. Also static checkers complain when we do:
suspended_slots &= ~upd_ctx.timedout_mask;
In this code "suspended_slots" is a u64 and "upd_ctx.timedout_mask". The
mask clears out the top 32 bits which would likely be a bug if anything
were stored there.
Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
Reviewed-by: Steven Price <steven.price@arm.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/85356b15-4840-4e64-8c75-922cdd6a5fef@moroto.mountain
---
drivers/gpu/drm/panthor/panthor_sched.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -2546,7 +2546,7 @@ void panthor_sched_suspend(struct pantho
{
struct panthor_scheduler *sched = ptdev->scheduler;
struct panthor_csg_slots_upd_ctx upd_ctx;
- u64 suspended_slots, faulty_slots;
+ u32 suspended_slots, faulty_slots;
struct panthor_group *group;
u32 i;

View File

@ -1,53 +0,0 @@
From be2d3e9d061552af6c50220ee7b7e76458a3080f Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Thu, 25 Apr 2024 12:39:20 +0200
Subject: [PATCH] drm/panthor: Kill the faulty_slots variable in
panthor_sched_suspend()
We can use upd_ctx.timedout_mask directly, and the faulty_slots update
in the flush_caches_failed situation is never used.
Suggested-by: Suggested-by: Steven Price <steven.price@arm.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240425103920.826458-1-boris.brezillon@collabora.com
---
drivers/gpu/drm/panthor/panthor_sched.c | 10 +++-------
1 file changed, 3 insertions(+), 7 deletions(-)
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -2546,8 +2546,8 @@ void panthor_sched_suspend(struct pantho
{
struct panthor_scheduler *sched = ptdev->scheduler;
struct panthor_csg_slots_upd_ctx upd_ctx;
- u32 suspended_slots, faulty_slots;
struct panthor_group *group;
+ u32 suspended_slots;
u32 i;
mutex_lock(&sched->lock);
@@ -2566,10 +2566,9 @@ void panthor_sched_suspend(struct pantho
csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
suspended_slots &= ~upd_ctx.timedout_mask;
- faulty_slots = upd_ctx.timedout_mask;
- if (faulty_slots) {
- u32 slot_mask = faulty_slots;
+ if (upd_ctx.timedout_mask) {
+ u32 slot_mask = upd_ctx.timedout_mask;
drm_err(&ptdev->base, "CSG suspend failed, escalating to termination");
csgs_upd_ctx_init(&upd_ctx);
@@ -2620,9 +2619,6 @@ void panthor_sched_suspend(struct pantho
slot_mask &= ~BIT(csg_id);
}
-
- if (flush_caches_failed)
- faulty_slots |= suspended_slots;
}
for (i = 0; i < sched->csg_slot_count; i++) {

View File

@ -1,117 +0,0 @@
From 8bdbd8b5580b46c8cae365567f5bf6cc956e6512 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Thu, 2 May 2024 17:52:48 +0200
Subject: [PATCH] drm/panthor: Make sure we handle 'unknown group state' case
properly
When we check for state values returned by the FW, we only cover part of
the 0:7 range. Make sure we catch FW inconsistencies by adding a default
to the switch statement, and flagging the group state as unknown in that
case.
When an unknown state is detected, we trigger a reset, and consider the
group as unusable after that point, to prevent the potential corruption
from creeping in other places if we continue executing stuff on this
context.
v2:
- Add Steve's R-b
- Fix commit message
Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
Closes: https://lore.kernel.org/dri-devel/3b7fd2f2-679e-440c-81cd-42fc2573b515@moroto.mountain/T/#u
Suggested-by: Steven Price <steven.price@arm.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240502155248.1430582-1-boris.brezillon@collabora.com
---
drivers/gpu/drm/panthor/panthor_sched.c | 37 +++++++++++++++++++++++--
1 file changed, 35 insertions(+), 2 deletions(-)
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -490,6 +490,18 @@ enum panthor_group_state {
* Can no longer be scheduled. The only allowed action is a destruction.
*/
PANTHOR_CS_GROUP_TERMINATED,
+
+ /**
+ * @PANTHOR_CS_GROUP_UNKNOWN_STATE: Group is an unknown state.
+ *
+ * The FW returned an inconsistent state. The group is flagged unusable
+ * and can no longer be scheduled. The only allowed action is a
+ * destruction.
+ *
+ * When that happens, we also schedule a FW reset, to start from a fresh
+ * state.
+ */
+ PANTHOR_CS_GROUP_UNKNOWN_STATE,
};
/**
@@ -1127,6 +1139,7 @@ csg_slot_sync_state_locked(struct pantho
struct panthor_fw_csg_iface *csg_iface;
struct panthor_group *group;
enum panthor_group_state new_state, old_state;
+ u32 csg_state;
lockdep_assert_held(&ptdev->scheduler->lock);
@@ -1137,7 +1150,8 @@ csg_slot_sync_state_locked(struct pantho
return;
old_state = group->state;
- switch (csg_iface->output->ack & CSG_STATE_MASK) {
+ csg_state = csg_iface->output->ack & CSG_STATE_MASK;
+ switch (csg_state) {
case CSG_STATE_START:
case CSG_STATE_RESUME:
new_state = PANTHOR_CS_GROUP_ACTIVE;
@@ -1148,11 +1162,28 @@ csg_slot_sync_state_locked(struct pantho
case CSG_STATE_SUSPEND:
new_state = PANTHOR_CS_GROUP_SUSPENDED;
break;
+ default:
+ /* The unknown state might be caused by a FW state corruption,
+ * which means the group metadata can't be trusted anymore, and
+ * the SUSPEND operation might propagate the corruption to the
+ * suspend buffers. Flag the group state as unknown to make
+ * sure it's unusable after that point.
+ */
+ drm_err(&ptdev->base, "Invalid state on CSG %d (state=%d)",
+ csg_id, csg_state);
+ new_state = PANTHOR_CS_GROUP_UNKNOWN_STATE;
+ break;
}
if (old_state == new_state)
return;
+ /* The unknown state might be caused by a FW issue, reset the FW to
+ * take a fresh start.
+ */
+ if (new_state == PANTHOR_CS_GROUP_UNKNOWN_STATE)
+ panthor_device_schedule_reset(ptdev);
+
if (new_state == PANTHOR_CS_GROUP_SUSPENDED)
csg_slot_sync_queues_state_locked(ptdev, csg_id);
@@ -1783,6 +1814,7 @@ static bool
group_can_run(struct panthor_group *group)
{
return group->state != PANTHOR_CS_GROUP_TERMINATED &&
+ group->state != PANTHOR_CS_GROUP_UNKNOWN_STATE &&
!group->destroyed && group->fatal_queues == 0 &&
!group->timedout;
}
@@ -2557,7 +2589,8 @@ void panthor_sched_suspend(struct pantho
if (csg_slot->group) {
csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, i,
- CSG_STATE_SUSPEND,
+ group_can_run(csg_slot->group) ?
+ CSG_STATE_SUSPEND : CSG_STATE_TERMINATE,
CSG_STATE_MASK);
}
}

View File

@ -1,36 +0,0 @@
From 2fa42fd910c4ede1ae9c18d535b425046fa49351 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Tue, 30 Apr 2024 13:37:27 +0200
Subject: [PATCH] drm/panthor: Fix the FW reset logic
In the post_reset function, if the fast reset didn't succeed, we
are not clearing the fast_reset flag, which prevents firmware
sections from being reloaded. While at it, use panthor_fw_stop()
instead of manually writing DISABLE to the MCU_CONTROL register.
Fixes: 2718d91816ee ("drm/panthor: Add the FW logical block")
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240430113727.493155-1-boris.brezillon@collabora.com
---
drivers/gpu/drm/panthor/panthor_fw.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
--- a/drivers/gpu/drm/panthor/panthor_fw.c
+++ b/drivers/gpu/drm/panthor/panthor_fw.c
@@ -1083,10 +1083,11 @@ int panthor_fw_post_reset(struct panthor
if (!ret)
goto out;
- /* Force a disable, so we get a fresh boot on the next
- * panthor_fw_start() call.
+ /* Forcibly reset the MCU and force a slow reset, so we get a
+ * fresh boot on the next panthor_fw_start() call.
*/
- gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_DISABLE);
+ panthor_fw_stop(ptdev);
+ ptdev->fw->fast_reset = false;
drm_err(&ptdev->base, "FW fast reset failed, trying a slow reset");
}

View File

@ -1,82 +0,0 @@
From d2143297579f12ea22479d403d955819838e7e67 Mon Sep 17 00:00:00 2001
From: Antonino Maniscalco <antonino.maniscalco@collabora.com>
Date: Thu, 2 May 2024 18:51:54 +0200
Subject: [PATCH] drm/panthor: Fix tiler OOM handling to allow incremental
rendering
If the kernel couldn't allocate memory because we reached the maximum
number of chunks but no render passes are in flight
(panthor_heap_grow() returning -ENOMEM), we should defer the OOM
handling to the FW by returning a NULL chunk. The FW will then call
the tiler OOM exception handler, which is supposed to implement
incremental rendering (execute an intermediate fragment job to flush
the pending primitives, release the tiler memory that was used to
store those primitives, and start over from where it stopped).
Instead of checking for both ENOMEM and EBUSY, make panthor_heap_grow()
return ENOMEM no matter the reason of this allocation failure, the FW
doesn't care anyway.
v3:
- Add R-bs
v2:
- Make panthor_heap_grow() return -ENOMEM for all kind of allocation
failures
- Document the panthor_heap_grow() semantics
Fixes: de8548813824 ("drm/panthor: Add the scheduler logical block")
Signed-off-by: Antonino Maniscalco <antonino.maniscalco@collabora.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240502165158.1458959-2-boris.brezillon@collabora.com
---
drivers/gpu/drm/panthor/panthor_heap.c | 12 ++++++++----
drivers/gpu/drm/panthor/panthor_sched.c | 7 ++++++-
2 files changed, 14 insertions(+), 5 deletions(-)
--- a/drivers/gpu/drm/panthor/panthor_heap.c
+++ b/drivers/gpu/drm/panthor/panthor_heap.c
@@ -410,6 +410,13 @@ out_unlock:
* @renderpasses_in_flight: Number of render passes currently in-flight.
* @pending_frag_count: Number of fragment jobs waiting for execution/completion.
* @new_chunk_gpu_va: Pointer used to return the chunk VA.
+ *
+ * Return:
+ * - 0 if a new heap was allocated
+ * - -ENOMEM if the tiler context reached the maximum number of chunks
+ * or if too many render passes are in-flight
+ * or if the allocation failed
+ * - -EINVAL if any of the arguments passed to panthor_heap_grow() is invalid
*/
int panthor_heap_grow(struct panthor_heap_pool *pool,
u64 heap_gpu_va,
@@ -439,10 +446,7 @@ int panthor_heap_grow(struct panthor_hea
* handler provided by the userspace driver, if any).
*/
if (renderpasses_in_flight > heap->target_in_flight ||
- (pending_frag_count > 0 && heap->chunk_count >= heap->max_chunks)) {
- ret = -EBUSY;
- goto out_unlock;
- } else if (heap->chunk_count >= heap->max_chunks) {
+ heap->chunk_count >= heap->max_chunks) {
ret = -ENOMEM;
goto out_unlock;
}
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -1385,7 +1385,12 @@ static int group_process_tiler_oom(struc
pending_frag_count, &new_chunk_va);
}
- if (ret && ret != -EBUSY) {
+ /* If the heap context doesn't have memory for us, we want to let the
+ * FW try to reclaim memory by waiting for fragment jobs to land or by
+ * executing the tiler OOM exception handler, which is supposed to
+ * implement incremental rendering.
+ */
+ if (ret && ret != -ENOMEM) {
drm_warn(&ptdev->base, "Failed to extend the tiler heap\n");
group->fatal_queues |= BIT(cs_id);
sched_queue_delayed_work(sched, tick, 0);

View File

@ -1,62 +0,0 @@
From e3193f0fbd6d83510ff6879ac248f42a7c0fefe7 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Thu, 2 May 2024 18:51:55 +0200
Subject: [PATCH] drm/panthor: Make sure the tiler initial/max chunks are
consistent
It doesn't make sense to have a maximum number of chunks smaller than
the initial number of chunks attached to the context.
Fix the uAPI header to reflect the new constraint, and mention the
undocumented "initial_chunk_count > 0" constraint while at it.
v3:
- Add R-b
v2:
- Fix the check
Fixes: 9cca48fa4f89 ("drm/panthor: Add the heap logical block")
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240502165158.1458959-3-boris.brezillon@collabora.com
---
drivers/gpu/drm/panthor/panthor_heap.c | 3 +++
include/uapi/drm/panthor_drm.h | 8 ++++++--
2 files changed, 9 insertions(+), 2 deletions(-)
--- a/drivers/gpu/drm/panthor/panthor_heap.c
+++ b/drivers/gpu/drm/panthor/panthor_heap.c
@@ -281,6 +281,9 @@ int panthor_heap_create(struct panthor_h
if (initial_chunk_count == 0)
return -EINVAL;
+ if (initial_chunk_count > max_chunks)
+ return -EINVAL;
+
if (hweight32(chunk_size) != 1 ||
chunk_size < SZ_256K || chunk_size > SZ_2M)
return -EINVAL;
--- a/include/uapi/drm/panthor_drm.h
+++ b/include/uapi/drm/panthor_drm.h
@@ -895,13 +895,17 @@ struct drm_panthor_tiler_heap_create {
/** @vm_id: VM ID the tiler heap should be mapped to */
__u32 vm_id;
- /** @initial_chunk_count: Initial number of chunks to allocate. */
+ /** @initial_chunk_count: Initial number of chunks to allocate. Must be at least one. */
__u32 initial_chunk_count;
/** @chunk_size: Chunk size. Must be a power of two at least 256KB large. */
__u32 chunk_size;
- /** @max_chunks: Maximum number of chunks that can be allocated. */
+ /**
+ * @max_chunks: Maximum number of chunks that can be allocated.
+ *
+ * Must be at least @initial_chunk_count.
+ */
__u32 max_chunks;
/**

View File

@ -1,76 +0,0 @@
From 69a429905ceccad547e4a532b08f9d32c7f3422a Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Thu, 2 May 2024 18:51:56 +0200
Subject: [PATCH] drm/panthor: Relax the constraints on the tiler chunk size
The field used to store the chunk size if 12 bits wide, and the encoding
is chunk_size = chunk_header.chunk_size << 12, which gives us a
theoretical [4k:8M] range. This range is further limited by
implementation constraints, and all known implementations seem to
impose a [128k:8M] range, so do the same here.
We also relax the power-of-two constraint, which doesn't seem to
exist on v10. This will allow userspace to fine-tune initial/max
tiler memory on memory-constrained devices.
v4:
- Actually fix the range in the kerneldoc
v3:
- Add R-bs
- Fix valid range in the kerneldoc
v2:
- Turn the power-of-two constraint into a page-aligned constraint to allow
fine-tune of the initial/max heap memory size
- Fix the panthor_heap_create() kerneldoc
Fixes: 9cca48fa4f89 ("drm/panthor: Add the heap logical block")
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240502165158.1458959-4-boris.brezillon@collabora.com
---
drivers/gpu/drm/panthor/panthor_heap.c | 8 ++++----
include/uapi/drm/panthor_drm.h | 6 +++++-
2 files changed, 9 insertions(+), 5 deletions(-)
--- a/drivers/gpu/drm/panthor/panthor_heap.c
+++ b/drivers/gpu/drm/panthor/panthor_heap.c
@@ -253,8 +253,8 @@ int panthor_heap_destroy(struct panthor_
* @pool: Pool to instantiate the heap context from.
* @initial_chunk_count: Number of chunk allocated at initialization time.
* Must be at least 1.
- * @chunk_size: The size of each chunk. Must be a power of two between 256k
- * and 2M.
+ * @chunk_size: The size of each chunk. Must be page-aligned and lie in the
+ * [128k:8M] range.
* @max_chunks: Maximum number of chunks that can be allocated.
* @target_in_flight: Maximum number of in-flight render passes.
* @heap_ctx_gpu_va: Pointer holding the GPU address of the allocated heap
@@ -284,8 +284,8 @@ int panthor_heap_create(struct panthor_h
if (initial_chunk_count > max_chunks)
return -EINVAL;
- if (hweight32(chunk_size) != 1 ||
- chunk_size < SZ_256K || chunk_size > SZ_2M)
+ if (!IS_ALIGNED(chunk_size, PAGE_SIZE) ||
+ chunk_size < SZ_128K || chunk_size > SZ_8M)
return -EINVAL;
down_read(&pool->lock);
--- a/include/uapi/drm/panthor_drm.h
+++ b/include/uapi/drm/panthor_drm.h
@@ -898,7 +898,11 @@ struct drm_panthor_tiler_heap_create {
/** @initial_chunk_count: Initial number of chunks to allocate. Must be at least one. */
__u32 initial_chunk_count;
- /** @chunk_size: Chunk size. Must be a power of two at least 256KB large. */
+ /**
+ * @chunk_size: Chunk size.
+ *
+ * Must be page-aligned and lie in the [128k:8M] range.
+ */
__u32 chunk_size;
/**

View File

@ -1,53 +0,0 @@
From 8e43b1e537d4fb313efac1b5d0d01db0fe35f695 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Thu, 2 May 2024 18:51:57 +0200
Subject: [PATCH] drm/panthor: Fix an off-by-one in the heap context retrieval
logic
The heap ID is used to index the heap context pool, and allocating
in the [1:MAX_HEAPS_PER_POOL] leads to an off-by-one. This was
originally to avoid returning a zero heap handle, but given the handle
is formed with (vm_id << 16) | heap_id, with vm_id > 0, we already can't
end up with a valid heap handle that's zero.
v4:
- s/XA_FLAGS_ALLOC1/XA_FLAGS_ALLOC/
v3:
- Allocate in the [0:MAX_HEAPS_PER_POOL-1] range
v2:
- New patch
Fixes: 9cca48fa4f89 ("drm/panthor: Add the heap logical block")
Reported-by: Eric Smith <eric.smith@collabora.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Tested-by: Eric Smith <eric.smith@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240502165158.1458959-5-boris.brezillon@collabora.com
---
drivers/gpu/drm/panthor/panthor_heap.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
--- a/drivers/gpu/drm/panthor/panthor_heap.c
+++ b/drivers/gpu/drm/panthor/panthor_heap.c
@@ -323,7 +323,8 @@ int panthor_heap_create(struct panthor_h
if (!pool->vm) {
ret = -EINVAL;
} else {
- ret = xa_alloc(&pool->xa, &id, heap, XA_LIMIT(1, MAX_HEAPS_PER_POOL), GFP_KERNEL);
+ ret = xa_alloc(&pool->xa, &id, heap,
+ XA_LIMIT(0, MAX_HEAPS_PER_POOL - 1), GFP_KERNEL);
if (!ret) {
void *gpu_ctx = panthor_get_heap_ctx(pool, id);
@@ -543,7 +544,7 @@ panthor_heap_pool_create(struct panthor_
pool->vm = vm;
pool->ptdev = ptdev;
init_rwsem(&pool->lock);
- xa_init_flags(&pool->xa, XA_FLAGS_ALLOC1);
+ xa_init_flags(&pool->xa, XA_FLAGS_ALLOC);
kref_init(&pool->refcount);
pool->gpu_contexts = panthor_kernel_bo_create(ptdev, vm, bosize,

View File

@ -1,58 +0,0 @@
From 2b2a26b3314210585ca6d552a421921a3936713b Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Thu, 2 May 2024 20:38:09 +0200
Subject: [PATCH] drm/panthor: Force an immediate reset on unrecoverable faults
If the FW reports an unrecoverable fault, we need to reset the GPU
before we can start re-using it again.
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240502183813.1612017-2-boris.brezillon@collabora.com
---
drivers/gpu/drm/panthor/panthor_device.c | 1 +
drivers/gpu/drm/panthor/panthor_device.h | 1 +
drivers/gpu/drm/panthor/panthor_sched.c | 11 ++++++++++-
3 files changed, 12 insertions(+), 1 deletion(-)
--- a/drivers/gpu/drm/panthor/panthor_device.c
+++ b/drivers/gpu/drm/panthor/panthor_device.c
@@ -293,6 +293,7 @@ static const struct panthor_exception_in
PANTHOR_EXCEPTION(ACTIVE),
PANTHOR_EXCEPTION(CS_RES_TERM),
PANTHOR_EXCEPTION(CS_CONFIG_FAULT),
+ PANTHOR_EXCEPTION(CS_UNRECOVERABLE),
PANTHOR_EXCEPTION(CS_ENDPOINT_FAULT),
PANTHOR_EXCEPTION(CS_BUS_FAULT),
PANTHOR_EXCEPTION(CS_INSTR_INVALID),
--- a/drivers/gpu/drm/panthor/panthor_device.h
+++ b/drivers/gpu/drm/panthor/panthor_device.h
@@ -216,6 +216,7 @@ enum drm_panthor_exception_type {
DRM_PANTHOR_EXCEPTION_CS_RES_TERM = 0x0f,
DRM_PANTHOR_EXCEPTION_MAX_NON_FAULT = 0x3f,
DRM_PANTHOR_EXCEPTION_CS_CONFIG_FAULT = 0x40,
+ DRM_PANTHOR_EXCEPTION_CS_UNRECOVERABLE = 0x41,
DRM_PANTHOR_EXCEPTION_CS_ENDPOINT_FAULT = 0x44,
DRM_PANTHOR_EXCEPTION_CS_BUS_FAULT = 0x48,
DRM_PANTHOR_EXCEPTION_CS_INSTR_INVALID = 0x49,
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -1281,7 +1281,16 @@ cs_slot_process_fatal_event_locked(struc
if (group)
group->fatal_queues |= BIT(cs_id);
- sched_queue_delayed_work(sched, tick, 0);
+ if (CS_EXCEPTION_TYPE(fatal) == DRM_PANTHOR_EXCEPTION_CS_UNRECOVERABLE) {
+ /* If this exception is unrecoverable, queue a reset, and make
+ * sure we stop scheduling groups until the reset has happened.
+ */
+ panthor_device_schedule_reset(ptdev);
+ cancel_delayed_work(&sched->tick_work);
+ } else {
+ sched_queue_delayed_work(sched, tick, 0);
+ }
+
drm_warn(&ptdev->base,
"CSG slot %d CS slot: %d\n"
"CS_FATAL.EXCEPTION_TYPE: 0x%x (%s)\n"

View File

@ -1,174 +0,0 @@
From ff60c8da0aaf7ecf5f4d48bebeb3c1f52b2088dd Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Thu, 2 May 2024 20:38:10 +0200
Subject: [PATCH] drm/panthor: Keep a ref to the VM at the panthor_kernel_bo
level
Avoids use-after-free situations when panthor_fw_unplug() is called
and the kernel BO was mapped to the FW VM.
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240502183813.1612017-3-boris.brezillon@collabora.com
---
drivers/gpu/drm/panthor/panthor_fw.c | 4 ++--
drivers/gpu/drm/panthor/panthor_gem.c | 8 +++++---
drivers/gpu/drm/panthor/panthor_gem.h | 8 ++++++--
drivers/gpu/drm/panthor/panthor_heap.c | 8 ++++----
drivers/gpu/drm/panthor/panthor_sched.c | 11 +++++------
5 files changed, 22 insertions(+), 17 deletions(-)
--- a/drivers/gpu/drm/panthor/panthor_fw.c
+++ b/drivers/gpu/drm/panthor/panthor_fw.c
@@ -453,7 +453,7 @@ panthor_fw_alloc_queue_iface_mem(struct
ret = panthor_kernel_bo_vmap(mem);
if (ret) {
- panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), mem);
+ panthor_kernel_bo_destroy(mem);
return ERR_PTR(ret);
}
@@ -1134,7 +1134,7 @@ void panthor_fw_unplug(struct panthor_de
panthor_fw_stop(ptdev);
list_for_each_entry(section, &ptdev->fw->sections, node)
- panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), section->mem);
+ panthor_kernel_bo_destroy(section->mem);
/* We intentionally don't call panthor_vm_idle() and let
* panthor_mmu_unplug() release the AS we acquired with
--- a/drivers/gpu/drm/panthor/panthor_gem.c
+++ b/drivers/gpu/drm/panthor/panthor_gem.c
@@ -26,18 +26,18 @@ static void panthor_gem_free_object(stru
/**
* panthor_kernel_bo_destroy() - Destroy a kernel buffer object
- * @vm: The VM this BO was mapped to.
* @bo: Kernel buffer object to destroy. If NULL or an ERR_PTR(), the destruction
* is skipped.
*/
-void panthor_kernel_bo_destroy(struct panthor_vm *vm,
- struct panthor_kernel_bo *bo)
+void panthor_kernel_bo_destroy(struct panthor_kernel_bo *bo)
{
+ struct panthor_vm *vm;
int ret;
if (IS_ERR_OR_NULL(bo))
return;
+ vm = bo->vm;
panthor_kernel_bo_vunmap(bo);
if (drm_WARN_ON(bo->obj->dev,
@@ -53,6 +53,7 @@ void panthor_kernel_bo_destroy(struct pa
drm_gem_object_put(bo->obj);
out_free_bo:
+ panthor_vm_put(vm);
kfree(bo);
}
@@ -106,6 +107,7 @@ panthor_kernel_bo_create(struct panthor_
if (ret)
goto err_free_va;
+ kbo->vm = panthor_vm_get(vm);
bo->exclusive_vm_root_gem = panthor_vm_root_gem(vm);
drm_gem_object_get(bo->exclusive_vm_root_gem);
bo->base.base.resv = bo->exclusive_vm_root_gem->resv;
--- a/drivers/gpu/drm/panthor/panthor_gem.h
+++ b/drivers/gpu/drm/panthor/panthor_gem.h
@@ -62,6 +62,11 @@ struct panthor_kernel_bo {
struct drm_gem_object *obj;
/**
+ * @vm: VM this private buffer is attached to.
+ */
+ struct panthor_vm *vm;
+
+ /**
* @va_node: VA space allocated to this GEM.
*/
struct drm_mm_node va_node;
@@ -136,7 +141,6 @@ panthor_kernel_bo_create(struct panthor_
size_t size, u32 bo_flags, u32 vm_map_flags,
u64 gpu_va);
-void panthor_kernel_bo_destroy(struct panthor_vm *vm,
- struct panthor_kernel_bo *bo);
+void panthor_kernel_bo_destroy(struct panthor_kernel_bo *bo);
#endif /* __PANTHOR_GEM_H__ */
--- a/drivers/gpu/drm/panthor/panthor_heap.c
+++ b/drivers/gpu/drm/panthor/panthor_heap.c
@@ -127,7 +127,7 @@ static void panthor_free_heap_chunk(stru
heap->chunk_count--;
mutex_unlock(&heap->lock);
- panthor_kernel_bo_destroy(vm, chunk->bo);
+ panthor_kernel_bo_destroy(chunk->bo);
kfree(chunk);
}
@@ -183,7 +183,7 @@ static int panthor_alloc_heap_chunk(stru
return 0;
err_destroy_bo:
- panthor_kernel_bo_destroy(vm, chunk->bo);
+ panthor_kernel_bo_destroy(chunk->bo);
err_free_chunk:
kfree(chunk);
@@ -395,7 +395,7 @@ int panthor_heap_return_chunk(struct pan
mutex_unlock(&heap->lock);
if (removed) {
- panthor_kernel_bo_destroy(pool->vm, chunk->bo);
+ panthor_kernel_bo_destroy(chunk->bo);
kfree(chunk);
ret = 0;
} else {
@@ -595,7 +595,7 @@ void panthor_heap_pool_destroy(struct pa
drm_WARN_ON(&pool->ptdev->base, panthor_heap_destroy_locked(pool, i));
if (!IS_ERR_OR_NULL(pool->gpu_contexts))
- panthor_kernel_bo_destroy(pool->vm, pool->gpu_contexts);
+ panthor_kernel_bo_destroy(pool->gpu_contexts);
/* Reflects the fact the pool has been destroyed. */
pool->vm = NULL;
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -826,8 +826,8 @@ static void group_free_queue(struct pant
panthor_queue_put_syncwait_obj(queue);
- panthor_kernel_bo_destroy(group->vm, queue->ringbuf);
- panthor_kernel_bo_destroy(panthor_fw_vm(group->ptdev), queue->iface.mem);
+ panthor_kernel_bo_destroy(queue->ringbuf);
+ panthor_kernel_bo_destroy(queue->iface.mem);
kfree(queue);
}
@@ -837,15 +837,14 @@ static void group_release_work(struct wo
struct panthor_group *group = container_of(work,
struct panthor_group,
release_work);
- struct panthor_device *ptdev = group->ptdev;
u32 i;
for (i = 0; i < group->queue_count; i++)
group_free_queue(group, group->queues[i]);
- panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), group->suspend_buf);
- panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), group->protm_suspend_buf);
- panthor_kernel_bo_destroy(group->vm, group->syncobjs);
+ panthor_kernel_bo_destroy(group->suspend_buf);
+ panthor_kernel_bo_destroy(group->protm_suspend_buf);
+ panthor_kernel_bo_destroy(group->syncobjs);
panthor_vm_put(group->vm);
kfree(group);

View File

@ -1,26 +0,0 @@
From a257e8182261da48b7c34615f2752f8a78ac108b Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Thu, 2 May 2024 20:38:11 +0200
Subject: [PATCH] drm/panthor: Reset the FW VM to NULL on unplug
This way get NULL derefs instead of use-after-free if the FW VM is
referenced after the device has been unplugged.
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Acked-by: Liviu Dudau <liviu.dudau@arm.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240502183813.1612017-4-boris.brezillon@collabora.com
---
drivers/gpu/drm/panthor/panthor_fw.c | 1 +
1 file changed, 1 insertion(+)
--- a/drivers/gpu/drm/panthor/panthor_fw.c
+++ b/drivers/gpu/drm/panthor/panthor_fw.c
@@ -1142,6 +1142,7 @@ void panthor_fw_unplug(struct panthor_de
* state to keep the active_refcnt balanced.
*/
panthor_vm_put(ptdev->fw->vm);
+ ptdev->fw->vm = NULL;
panthor_gpu_power_off(ptdev, L2, ptdev->gpu_info.l2_present, 20000);
}

View File

@ -1,90 +0,0 @@
From 3ce4322b1a3a40ca175b16fc54cf22b041ecfd4b Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Thu, 2 May 2024 20:38:12 +0200
Subject: [PATCH] drm/panthor: Call panthor_sched_post_reset() even if the
reset failed
We need to undo what was done in panthor_sched_pre_reset() even if the
reset failed. We just flag all previously running groups as terminated
when that happens to unblock things.
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240502183813.1612017-5-boris.brezillon@collabora.com
---
drivers/gpu/drm/panthor/panthor_device.c | 7 +------
drivers/gpu/drm/panthor/panthor_sched.c | 19 ++++++++++++++-----
drivers/gpu/drm/panthor/panthor_sched.h | 2 +-
3 files changed, 16 insertions(+), 12 deletions(-)
--- a/drivers/gpu/drm/panthor/panthor_device.c
+++ b/drivers/gpu/drm/panthor/panthor_device.c
@@ -129,13 +129,8 @@ static void panthor_device_reset_work(st
panthor_gpu_l2_power_on(ptdev);
panthor_mmu_post_reset(ptdev);
ret = panthor_fw_post_reset(ptdev);
- if (ret)
- goto out_dev_exit;
-
atomic_set(&ptdev->reset.pending, 0);
- panthor_sched_post_reset(ptdev);
-
-out_dev_exit:
+ panthor_sched_post_reset(ptdev, ret != 0);
drm_dev_exit(cookie);
if (ret) {
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -2733,15 +2733,22 @@ void panthor_sched_pre_reset(struct pant
mutex_unlock(&sched->reset.lock);
}
-void panthor_sched_post_reset(struct panthor_device *ptdev)
+void panthor_sched_post_reset(struct panthor_device *ptdev, bool reset_failed)
{
struct panthor_scheduler *sched = ptdev->scheduler;
struct panthor_group *group, *group_tmp;
mutex_lock(&sched->reset.lock);
- list_for_each_entry_safe(group, group_tmp, &sched->reset.stopped_groups, run_node)
+ list_for_each_entry_safe(group, group_tmp, &sched->reset.stopped_groups, run_node) {
+ /* Consider all previously running group as terminated if the
+ * reset failed.
+ */
+ if (reset_failed)
+ group->state = PANTHOR_CS_GROUP_TERMINATED;
+
panthor_group_start(group);
+ }
/* We're done resetting the GPU, clear the reset.in_progress bit so we can
* kick the scheduler.
@@ -2749,9 +2756,11 @@ void panthor_sched_post_reset(struct pan
atomic_set(&sched->reset.in_progress, false);
mutex_unlock(&sched->reset.lock);
- sched_queue_delayed_work(sched, tick, 0);
-
- sched_queue_work(sched, sync_upd);
+ /* No need to queue a tick and update syncs if the reset failed. */
+ if (!reset_failed) {
+ sched_queue_delayed_work(sched, tick, 0);
+ sched_queue_work(sched, sync_upd);
+ }
}
static void group_sync_upd_work(struct work_struct *work)
--- a/drivers/gpu/drm/panthor/panthor_sched.h
+++ b/drivers/gpu/drm/panthor/panthor_sched.h
@@ -40,7 +40,7 @@ void panthor_group_pool_destroy(struct p
int panthor_sched_init(struct panthor_device *ptdev);
void panthor_sched_unplug(struct panthor_device *ptdev);
void panthor_sched_pre_reset(struct panthor_device *ptdev);
-void panthor_sched_post_reset(struct panthor_device *ptdev);
+void panthor_sched_post_reset(struct panthor_device *ptdev, bool reset_failed);
void panthor_sched_suspend(struct panthor_device *ptdev);
void panthor_sched_resume(struct panthor_device *ptdev);

Some files were not shown because too many files have changed in this diff Show More