276 lines
8.7 KiB
Diff
276 lines
8.7 KiB
Diff
From f7fe64ad0f22ff034f8ebcfbd7299ee9cc9b57d7 Mon Sep 17 00:00:00 2001
|
|
From: Matthew Brost <matthew.brost@intel.com>
|
|
Date: Mon, 30 Oct 2023 20:24:37 -0700
|
|
Subject: [PATCH] drm/sched: Split free_job into own work item
|
|
|
|
Rather than call free_job and run_job in same work item have a dedicated
|
|
work item for each. This aligns with the design and intended use of work
|
|
queues.
|
|
|
|
v2:
|
|
- Test for DMA_FENCE_FLAG_TIMESTAMP_BIT before setting
|
|
timestamp in free_job() work item (Danilo)
|
|
v3:
|
|
- Drop forward dec of drm_sched_select_entity (Boris)
|
|
- Return in drm_sched_run_job_work if entity NULL (Boris)
|
|
v4:
|
|
- Replace dequeue with peek and invert logic (Luben)
|
|
- Wrap to 100 lines (Luben)
|
|
- Update comments for *_queue / *_queue_if_ready functions (Luben)
|
|
v5:
|
|
- Drop peek argument, blindly reinit idle (Luben)
|
|
- s/drm_sched_free_job_queue_if_ready/drm_sched_free_job_queue_if_done (Luben)
|
|
- Update work_run_job & work_free_job kernel doc (Luben)
|
|
v6:
|
|
- Do not move drm_sched_select_entity in file (Luben)
|
|
|
|
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
|
|
Link: https://lore.kernel.org/r/20231031032439.1558703-4-matthew.brost@intel.com
|
|
Reviewed-by: Luben Tuikov <ltuikov89@gmail.com>
|
|
Signed-off-by: Luben Tuikov <ltuikov89@gmail.com>
|
|
---
|
|
drivers/gpu/drm/scheduler/sched_main.c | 146 +++++++++++++++++--------
|
|
include/drm/gpu_scheduler.h | 4 +-
|
|
2 files changed, 101 insertions(+), 49 deletions(-)
|
|
|
|
--- a/drivers/gpu/drm/scheduler/sched_main.c
|
|
+++ b/drivers/gpu/drm/scheduler/sched_main.c
|
|
@@ -266,6 +266,32 @@ static void drm_sched_run_job_queue(stru
|
|
}
|
|
|
|
/**
|
|
+ * drm_sched_free_job_queue - enqueue free-job work
|
|
+ * @sched: scheduler instance
|
|
+ */
|
|
+static void drm_sched_free_job_queue(struct drm_gpu_scheduler *sched)
|
|
+{
|
|
+ if (!READ_ONCE(sched->pause_submit))
|
|
+ queue_work(sched->submit_wq, &sched->work_free_job);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * drm_sched_free_job_queue_if_done - enqueue free-job work if ready
|
|
+ * @sched: scheduler instance
|
|
+ */
|
|
+static void drm_sched_free_job_queue_if_done(struct drm_gpu_scheduler *sched)
|
|
+{
|
|
+ struct drm_sched_job *job;
|
|
+
|
|
+ spin_lock(&sched->job_list_lock);
|
|
+ job = list_first_entry_or_null(&sched->pending_list,
|
|
+ struct drm_sched_job, list);
|
|
+ if (job && dma_fence_is_signaled(&job->s_fence->finished))
|
|
+ drm_sched_free_job_queue(sched);
|
|
+ spin_unlock(&sched->job_list_lock);
|
|
+}
|
|
+
|
|
+/**
|
|
* drm_sched_job_done - complete a job
|
|
* @s_job: pointer to the job which is done
|
|
*
|
|
@@ -284,7 +310,7 @@ static void drm_sched_job_done(struct dr
|
|
dma_fence_get(&s_fence->finished);
|
|
drm_sched_fence_finished(s_fence, result);
|
|
dma_fence_put(&s_fence->finished);
|
|
- drm_sched_run_job_queue(sched);
|
|
+ drm_sched_free_job_queue(sched);
|
|
}
|
|
|
|
/**
|
|
@@ -951,8 +977,10 @@ drm_sched_get_cleanup_job(struct drm_gpu
|
|
typeof(*next), list);
|
|
|
|
if (next) {
|
|
- next->s_fence->scheduled.timestamp =
|
|
- dma_fence_timestamp(&job->s_fence->finished);
|
|
+ if (test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT,
|
|
+ &next->s_fence->scheduled.flags))
|
|
+ next->s_fence->scheduled.timestamp =
|
|
+ dma_fence_timestamp(&job->s_fence->finished);
|
|
/* start TO timer for next job */
|
|
drm_sched_start_timeout(sched);
|
|
}
|
|
@@ -1002,7 +1030,40 @@ drm_sched_pick_best(struct drm_gpu_sched
|
|
EXPORT_SYMBOL(drm_sched_pick_best);
|
|
|
|
/**
|
|
- * drm_sched_run_job_work - main scheduler thread
|
|
+ * drm_sched_run_job_queue_if_ready - enqueue run-job work if ready
|
|
+ * @sched: scheduler instance
|
|
+ */
|
|
+static void drm_sched_run_job_queue_if_ready(struct drm_gpu_scheduler *sched)
|
|
+{
|
|
+ if (drm_sched_select_entity(sched))
|
|
+ drm_sched_run_job_queue(sched);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * drm_sched_free_job_work - worker to call free_job
|
|
+ *
|
|
+ * @w: free job work
|
|
+ */
|
|
+static void drm_sched_free_job_work(struct work_struct *w)
|
|
+{
|
|
+ struct drm_gpu_scheduler *sched =
|
|
+ container_of(w, struct drm_gpu_scheduler, work_free_job);
|
|
+ struct drm_sched_job *cleanup_job;
|
|
+
|
|
+ if (READ_ONCE(sched->pause_submit))
|
|
+ return;
|
|
+
|
|
+ cleanup_job = drm_sched_get_cleanup_job(sched);
|
|
+ if (cleanup_job) {
|
|
+ sched->ops->free_job(cleanup_job);
|
|
+
|
|
+ drm_sched_free_job_queue_if_done(sched);
|
|
+ drm_sched_run_job_queue_if_ready(sched);
|
|
+ }
|
|
+}
|
|
+
|
|
+/**
|
|
+ * drm_sched_run_job_work - worker to call run_job
|
|
*
|
|
* @w: run job work
|
|
*/
|
|
@@ -1011,65 +1072,51 @@ static void drm_sched_run_job_work(struc
|
|
struct drm_gpu_scheduler *sched =
|
|
container_of(w, struct drm_gpu_scheduler, work_run_job);
|
|
struct drm_sched_entity *entity;
|
|
- struct drm_sched_job *cleanup_job;
|
|
+ struct dma_fence *fence;
|
|
+ struct drm_sched_fence *s_fence;
|
|
+ struct drm_sched_job *sched_job;
|
|
int r;
|
|
|
|
if (READ_ONCE(sched->pause_submit))
|
|
return;
|
|
|
|
- cleanup_job = drm_sched_get_cleanup_job(sched);
|
|
entity = drm_sched_select_entity(sched);
|
|
+ if (!entity)
|
|
+ return;
|
|
|
|
- if (!entity && !cleanup_job)
|
|
+ sched_job = drm_sched_entity_pop_job(entity);
|
|
+ if (!sched_job) {
|
|
+ complete_all(&entity->entity_idle);
|
|
return; /* No more work */
|
|
+ }
|
|
|
|
- if (cleanup_job)
|
|
- sched->ops->free_job(cleanup_job);
|
|
-
|
|
- if (entity) {
|
|
- struct dma_fence *fence;
|
|
- struct drm_sched_fence *s_fence;
|
|
- struct drm_sched_job *sched_job;
|
|
-
|
|
- sched_job = drm_sched_entity_pop_job(entity);
|
|
- if (!sched_job) {
|
|
- complete_all(&entity->entity_idle);
|
|
- if (!cleanup_job)
|
|
- return; /* No more work */
|
|
- goto again;
|
|
- }
|
|
-
|
|
- s_fence = sched_job->s_fence;
|
|
-
|
|
- atomic_inc(&sched->hw_rq_count);
|
|
- drm_sched_job_begin(sched_job);
|
|
+ s_fence = sched_job->s_fence;
|
|
|
|
- trace_drm_run_job(sched_job, entity);
|
|
- fence = sched->ops->run_job(sched_job);
|
|
- complete_all(&entity->entity_idle);
|
|
- drm_sched_fence_scheduled(s_fence, fence);
|
|
+ atomic_inc(&sched->hw_rq_count);
|
|
+ drm_sched_job_begin(sched_job);
|
|
|
|
- if (!IS_ERR_OR_NULL(fence)) {
|
|
- /* Drop for original kref_init of the fence */
|
|
- dma_fence_put(fence);
|
|
+ trace_drm_run_job(sched_job, entity);
|
|
+ fence = sched->ops->run_job(sched_job);
|
|
+ complete_all(&entity->entity_idle);
|
|
+ drm_sched_fence_scheduled(s_fence, fence);
|
|
|
|
- r = dma_fence_add_callback(fence, &sched_job->cb,
|
|
- drm_sched_job_done_cb);
|
|
- if (r == -ENOENT)
|
|
- drm_sched_job_done(sched_job, fence->error);
|
|
- else if (r)
|
|
- DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n",
|
|
- r);
|
|
- } else {
|
|
- drm_sched_job_done(sched_job, IS_ERR(fence) ?
|
|
- PTR_ERR(fence) : 0);
|
|
- }
|
|
+ if (!IS_ERR_OR_NULL(fence)) {
|
|
+ /* Drop for original kref_init of the fence */
|
|
+ dma_fence_put(fence);
|
|
|
|
- wake_up(&sched->job_scheduled);
|
|
+ r = dma_fence_add_callback(fence, &sched_job->cb,
|
|
+ drm_sched_job_done_cb);
|
|
+ if (r == -ENOENT)
|
|
+ drm_sched_job_done(sched_job, fence->error);
|
|
+ else if (r)
|
|
+ DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n", r);
|
|
+ } else {
|
|
+ drm_sched_job_done(sched_job, IS_ERR(fence) ?
|
|
+ PTR_ERR(fence) : 0);
|
|
}
|
|
|
|
-again:
|
|
- drm_sched_run_job_queue(sched);
|
|
+ wake_up(&sched->job_scheduled);
|
|
+ drm_sched_run_job_queue_if_ready(sched);
|
|
}
|
|
|
|
/**
|
|
@@ -1153,6 +1200,7 @@ int drm_sched_init(struct drm_gpu_schedu
|
|
atomic_set(&sched->hw_rq_count, 0);
|
|
INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
|
|
INIT_WORK(&sched->work_run_job, drm_sched_run_job_work);
|
|
+ INIT_WORK(&sched->work_free_job, drm_sched_free_job_work);
|
|
atomic_set(&sched->_score, 0);
|
|
atomic64_set(&sched->job_id_count, 0);
|
|
sched->pause_submit = false;
|
|
@@ -1282,6 +1330,7 @@ void drm_sched_wqueue_stop(struct drm_gp
|
|
{
|
|
WRITE_ONCE(sched->pause_submit, true);
|
|
cancel_work_sync(&sched->work_run_job);
|
|
+ cancel_work_sync(&sched->work_free_job);
|
|
}
|
|
EXPORT_SYMBOL(drm_sched_wqueue_stop);
|
|
|
|
@@ -1294,5 +1343,6 @@ void drm_sched_wqueue_start(struct drm_g
|
|
{
|
|
WRITE_ONCE(sched->pause_submit, false);
|
|
queue_work(sched->submit_wq, &sched->work_run_job);
|
|
+ queue_work(sched->submit_wq, &sched->work_free_job);
|
|
}
|
|
EXPORT_SYMBOL(drm_sched_wqueue_start);
|
|
--- a/include/drm/gpu_scheduler.h
|
|
+++ b/include/drm/gpu_scheduler.h
|
|
@@ -479,9 +479,10 @@ struct drm_sched_backend_ops {
|
|
* finished.
|
|
* @hw_rq_count: the number of jobs currently in the hardware queue.
|
|
* @job_id_count: used to assign unique id to the each job.
|
|
- * @submit_wq: workqueue used to queue @work_run_job
|
|
+ * @submit_wq: workqueue used to queue @work_run_job and @work_free_job
|
|
* @timeout_wq: workqueue used to queue @work_tdr
|
|
* @work_run_job: work which calls run_job op of each scheduler.
|
|
+ * @work_free_job: work which calls free_job op of each scheduler.
|
|
* @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the
|
|
* timeout interval is over.
|
|
* @pending_list: the list of jobs which are currently in the job queue.
|
|
@@ -511,6 +512,7 @@ struct drm_gpu_scheduler {
|
|
struct workqueue_struct *submit_wq;
|
|
struct workqueue_struct *timeout_wq;
|
|
struct work_struct work_run_job;
|
|
+ struct work_struct work_free_job;
|
|
struct delayed_work work_tdr;
|
|
struct list_head pending_list;
|
|
spinlock_t job_list_lock;
|