Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/drivers/accel/ethosu/ethosu_job.c
122915 views
1
// SPDX-License-Identifier: GPL-2.0-only OR MIT
2
/* Copyright 2024-2025 Tomeu Vizoso <[email protected]> */
3
/* Copyright 2025 Arm, Ltd. */
4
5
#include <linux/bitfield.h>
6
#include <linux/genalloc.h>
7
#include <linux/interrupt.h>
8
#include <linux/iopoll.h>
9
#include <linux/platform_device.h>
10
#include <linux/pm_runtime.h>
11
12
#include <drm/drm_file.h>
13
#include <drm/drm_gem.h>
14
#include <drm/drm_gem_dma_helper.h>
15
#include <drm/drm_print.h>
16
#include <drm/ethosu_accel.h>
17
18
#include "ethosu_device.h"
19
#include "ethosu_drv.h"
20
#include "ethosu_gem.h"
21
#include "ethosu_job.h"
22
23
#define JOB_TIMEOUT_MS 500
24
25
static struct ethosu_job *to_ethosu_job(struct drm_sched_job *sched_job)
26
{
27
return container_of(sched_job, struct ethosu_job, base);
28
}
29
30
static const char *ethosu_fence_get_driver_name(struct dma_fence *fence)
31
{
32
return "ethosu";
33
}
34
35
static const char *ethosu_fence_get_timeline_name(struct dma_fence *fence)
36
{
37
return "ethosu-npu";
38
}
39
40
static const struct dma_fence_ops ethosu_fence_ops = {
41
.get_driver_name = ethosu_fence_get_driver_name,
42
.get_timeline_name = ethosu_fence_get_timeline_name,
43
};
44
45
static void ethosu_job_hw_submit(struct ethosu_device *dev, struct ethosu_job *job)
46
{
47
struct drm_gem_dma_object *cmd_bo = to_drm_gem_dma_obj(job->cmd_bo);
48
struct ethosu_validated_cmdstream_info *cmd_info = to_ethosu_bo(job->cmd_bo)->info;
49
50
for (int i = 0; i < job->region_cnt; i++) {
51
struct drm_gem_dma_object *bo;
52
int region = job->region_bo_num[i];
53
54
bo = to_drm_gem_dma_obj(job->region_bo[i]);
55
writel_relaxed(lower_32_bits(bo->dma_addr), dev->regs + NPU_REG_BASEP(region));
56
writel_relaxed(upper_32_bits(bo->dma_addr), dev->regs + NPU_REG_BASEP_HI(region));
57
dev_dbg(dev->base.dev, "Region %d base addr = %pad\n", region, &bo->dma_addr);
58
}
59
60
if (job->sram_size) {
61
writel_relaxed(lower_32_bits(dev->sramphys),
62
dev->regs + NPU_REG_BASEP(ETHOSU_SRAM_REGION));
63
writel_relaxed(upper_32_bits(dev->sramphys),
64
dev->regs + NPU_REG_BASEP_HI(ETHOSU_SRAM_REGION));
65
dev_dbg(dev->base.dev, "Region %d base addr = %pad (SRAM)\n",
66
ETHOSU_SRAM_REGION, &dev->sramphys);
67
}
68
69
writel_relaxed(lower_32_bits(cmd_bo->dma_addr), dev->regs + NPU_REG_QBASE);
70
writel_relaxed(upper_32_bits(cmd_bo->dma_addr), dev->regs + NPU_REG_QBASE_HI);
71
writel_relaxed(cmd_info->cmd_size, dev->regs + NPU_REG_QSIZE);
72
73
writel(CMD_TRANSITION_TO_RUN, dev->regs + NPU_REG_CMD);
74
75
dev_dbg(dev->base.dev,
76
"Submitted cmd at %pad to core\n", &cmd_bo->dma_addr);
77
}
78
79
static int ethosu_acquire_object_fences(struct ethosu_job *job)
80
{
81
int i, ret;
82
struct drm_gem_object **bos = job->region_bo;
83
struct ethosu_validated_cmdstream_info *info = to_ethosu_bo(job->cmd_bo)->info;
84
85
for (i = 0; i < job->region_cnt; i++) {
86
bool is_write;
87
88
if (!bos[i])
89
break;
90
91
ret = dma_resv_reserve_fences(bos[i]->resv, 1);
92
if (ret)
93
return ret;
94
95
is_write = info->output_region[job->region_bo_num[i]];
96
ret = drm_sched_job_add_implicit_dependencies(&job->base, bos[i],
97
is_write);
98
if (ret)
99
return ret;
100
}
101
102
return 0;
103
}
104
105
static void ethosu_attach_object_fences(struct ethosu_job *job)
106
{
107
int i;
108
struct dma_fence *fence = job->inference_done_fence;
109
struct drm_gem_object **bos = job->region_bo;
110
struct ethosu_validated_cmdstream_info *info = to_ethosu_bo(job->cmd_bo)->info;
111
112
for (i = 0; i < job->region_cnt; i++)
113
if (info->output_region[job->region_bo_num[i]])
114
dma_resv_add_fence(bos[i]->resv, fence, DMA_RESV_USAGE_WRITE);
115
}
116
117
static int ethosu_job_push(struct ethosu_job *job)
118
{
119
struct ww_acquire_ctx acquire_ctx;
120
int ret;
121
122
ret = drm_gem_lock_reservations(job->region_bo, job->region_cnt, &acquire_ctx);
123
if (ret)
124
return ret;
125
126
ret = ethosu_acquire_object_fences(job);
127
if (ret)
128
goto out;
129
130
ret = pm_runtime_resume_and_get(job->dev->base.dev);
131
if (!ret) {
132
guard(mutex)(&job->dev->sched_lock);
133
134
drm_sched_job_arm(&job->base);
135
job->inference_done_fence = dma_fence_get(&job->base.s_fence->finished);
136
kref_get(&job->refcount); /* put by scheduler job completion */
137
drm_sched_entity_push_job(&job->base);
138
ethosu_attach_object_fences(job);
139
}
140
141
out:
142
drm_gem_unlock_reservations(job->region_bo, job->region_cnt, &acquire_ctx);
143
return ret;
144
}
145
146
static void ethosu_job_err_cleanup(struct ethosu_job *job)
147
{
148
unsigned int i;
149
150
for (i = 0; i < job->region_cnt; i++)
151
drm_gem_object_put(job->region_bo[i]);
152
153
drm_gem_object_put(job->cmd_bo);
154
155
kfree(job);
156
}
157
158
static void ethosu_job_cleanup(struct kref *ref)
159
{
160
struct ethosu_job *job = container_of(ref, struct ethosu_job,
161
refcount);
162
163
pm_runtime_put_autosuspend(job->dev->base.dev);
164
165
dma_fence_put(job->done_fence);
166
dma_fence_put(job->inference_done_fence);
167
168
ethosu_job_err_cleanup(job);
169
}
170
171
static void ethosu_job_put(struct ethosu_job *job)
172
{
173
kref_put(&job->refcount, ethosu_job_cleanup);
174
}
175
176
static void ethosu_job_free(struct drm_sched_job *sched_job)
177
{
178
struct ethosu_job *job = to_ethosu_job(sched_job);
179
180
drm_sched_job_cleanup(sched_job);
181
ethosu_job_put(job);
182
}
183
184
static struct dma_fence *ethosu_job_run(struct drm_sched_job *sched_job)
185
{
186
struct ethosu_job *job = to_ethosu_job(sched_job);
187
struct ethosu_device *dev = job->dev;
188
struct dma_fence *fence = job->done_fence;
189
190
if (unlikely(job->base.s_fence->finished.error))
191
return NULL;
192
193
dma_fence_init(fence, &ethosu_fence_ops, &dev->fence_lock,
194
dev->fence_context, ++dev->emit_seqno);
195
dma_fence_get(fence);
196
197
scoped_guard(mutex, &dev->job_lock) {
198
dev->in_flight_job = job;
199
ethosu_job_hw_submit(dev, job);
200
}
201
202
return fence;
203
}
204
205
static void ethosu_job_handle_irq(struct ethosu_device *dev)
206
{
207
u32 status = readl_relaxed(dev->regs + NPU_REG_STATUS);
208
209
if (status & (STATUS_BUS_STATUS | STATUS_CMD_PARSE_ERR)) {
210
dev_err(dev->base.dev, "Error IRQ - %x\n", status);
211
drm_sched_fault(&dev->sched);
212
return;
213
}
214
215
scoped_guard(mutex, &dev->job_lock) {
216
if (dev->in_flight_job) {
217
dma_fence_signal(dev->in_flight_job->done_fence);
218
dev->in_flight_job = NULL;
219
}
220
}
221
}
222
223
static irqreturn_t ethosu_job_irq_handler_thread(int irq, void *data)
224
{
225
struct ethosu_device *dev = data;
226
227
ethosu_job_handle_irq(dev);
228
229
return IRQ_HANDLED;
230
}
231
232
static irqreturn_t ethosu_job_irq_handler(int irq, void *data)
233
{
234
struct ethosu_device *dev = data;
235
u32 status = readl_relaxed(dev->regs + NPU_REG_STATUS);
236
237
if (!(status & STATUS_IRQ_RAISED))
238
return IRQ_NONE;
239
240
writel_relaxed(CMD_CLEAR_IRQ, dev->regs + NPU_REG_CMD);
241
return IRQ_WAKE_THREAD;
242
}
243
244
static enum drm_gpu_sched_stat ethosu_job_timedout(struct drm_sched_job *bad)
245
{
246
struct ethosu_job *job = to_ethosu_job(bad);
247
struct ethosu_device *dev = job->dev;
248
bool running;
249
u32 *bocmds = to_drm_gem_dma_obj(job->cmd_bo)->vaddr;
250
u32 cmdaddr;
251
252
cmdaddr = readl_relaxed(dev->regs + NPU_REG_QREAD);
253
running = FIELD_GET(STATUS_STATE_RUNNING, readl_relaxed(dev->regs + NPU_REG_STATUS));
254
255
if (running) {
256
int ret;
257
u32 reg;
258
259
ret = readl_relaxed_poll_timeout(dev->regs + NPU_REG_QREAD,
260
reg,
261
reg != cmdaddr,
262
USEC_PER_MSEC, 100 * USEC_PER_MSEC);
263
264
/* If still running and progress is being made, just return */
265
if (!ret)
266
return DRM_GPU_SCHED_STAT_NO_HANG;
267
}
268
269
dev_err(dev->base.dev, "NPU sched timed out: NPU %s, cmdstream offset 0x%x: 0x%x\n",
270
running ? "running" : "stopped",
271
cmdaddr, bocmds[cmdaddr / 4]);
272
273
drm_sched_stop(&dev->sched, bad);
274
275
scoped_guard(mutex, &dev->job_lock)
276
dev->in_flight_job = NULL;
277
278
/* Proceed with reset now. */
279
pm_runtime_force_suspend(dev->base.dev);
280
pm_runtime_force_resume(dev->base.dev);
281
282
/* Restart the scheduler */
283
drm_sched_start(&dev->sched, 0);
284
285
return DRM_GPU_SCHED_STAT_RESET;
286
}
287
288
static const struct drm_sched_backend_ops ethosu_sched_ops = {
289
.run_job = ethosu_job_run,
290
.timedout_job = ethosu_job_timedout,
291
.free_job = ethosu_job_free
292
};
293
294
int ethosu_job_init(struct ethosu_device *edev)
295
{
296
struct device *dev = edev->base.dev;
297
struct drm_sched_init_args args = {
298
.ops = &ethosu_sched_ops,
299
.num_rqs = DRM_SCHED_PRIORITY_COUNT,
300
.credit_limit = 1,
301
.timeout = msecs_to_jiffies(JOB_TIMEOUT_MS),
302
.name = dev_name(dev),
303
.dev = dev,
304
};
305
int ret;
306
307
spin_lock_init(&edev->fence_lock);
308
ret = devm_mutex_init(dev, &edev->job_lock);
309
if (ret)
310
return ret;
311
ret = devm_mutex_init(dev, &edev->sched_lock);
312
if (ret)
313
return ret;
314
315
edev->irq = platform_get_irq(to_platform_device(dev), 0);
316
if (edev->irq < 0)
317
return edev->irq;
318
319
ret = devm_request_threaded_irq(dev, edev->irq,
320
ethosu_job_irq_handler,
321
ethosu_job_irq_handler_thread,
322
IRQF_SHARED, KBUILD_MODNAME,
323
edev);
324
if (ret) {
325
dev_err(dev, "failed to request irq\n");
326
return ret;
327
}
328
329
edev->fence_context = dma_fence_context_alloc(1);
330
331
ret = drm_sched_init(&edev->sched, &args);
332
if (ret) {
333
dev_err(dev, "Failed to create scheduler: %d\n", ret);
334
goto err_sched;
335
}
336
337
return 0;
338
339
err_sched:
340
drm_sched_fini(&edev->sched);
341
return ret;
342
}
343
344
void ethosu_job_fini(struct ethosu_device *dev)
345
{
346
drm_sched_fini(&dev->sched);
347
}
348
349
int ethosu_job_open(struct ethosu_file_priv *ethosu_priv)
350
{
351
struct ethosu_device *dev = ethosu_priv->edev;
352
struct drm_gpu_scheduler *sched = &dev->sched;
353
int ret;
354
355
ret = drm_sched_entity_init(&ethosu_priv->sched_entity,
356
DRM_SCHED_PRIORITY_NORMAL,
357
&sched, 1, NULL);
358
return WARN_ON(ret);
359
}
360
361
void ethosu_job_close(struct ethosu_file_priv *ethosu_priv)
362
{
363
struct drm_sched_entity *entity = &ethosu_priv->sched_entity;
364
365
drm_sched_entity_destroy(entity);
366
}
367
368
static int ethosu_ioctl_submit_job(struct drm_device *dev, struct drm_file *file,
369
struct drm_ethosu_job *job)
370
{
371
struct ethosu_device *edev = to_ethosu_device(dev);
372
struct ethosu_file_priv *file_priv = file->driver_priv;
373
struct ethosu_job *ejob = NULL;
374
struct ethosu_validated_cmdstream_info *cmd_info;
375
int ret = 0;
376
377
/* BO region 2 is reserved if SRAM is used */
378
if (job->region_bo_handles[ETHOSU_SRAM_REGION] && job->sram_size)
379
return -EINVAL;
380
381
if (edev->npu_info.sram_size < job->sram_size)
382
return -EINVAL;
383
384
ejob = kzalloc_obj(*ejob);
385
if (!ejob)
386
return -ENOMEM;
387
388
kref_init(&ejob->refcount);
389
390
ejob->dev = edev;
391
ejob->sram_size = job->sram_size;
392
393
ejob->done_fence = kzalloc_obj(*ejob->done_fence);
394
if (!ejob->done_fence) {
395
ret = -ENOMEM;
396
goto out_cleanup_job;
397
}
398
399
ret = drm_sched_job_init(&ejob->base,
400
&file_priv->sched_entity,
401
1, NULL, file->client_id);
402
if (ret)
403
goto out_put_job;
404
405
ejob->cmd_bo = drm_gem_object_lookup(file, job->cmd_bo);
406
if (!ejob->cmd_bo) {
407
ret = -ENOENT;
408
goto out_cleanup_job;
409
}
410
cmd_info = to_ethosu_bo(ejob->cmd_bo)->info;
411
if (!cmd_info) {
412
ret = -EINVAL;
413
goto out_cleanup_job;
414
}
415
416
for (int i = 0; i < NPU_BASEP_REGION_MAX; i++) {
417
struct drm_gem_object *gem;
418
419
/* Can only omit a BO handle if the region is not used or used for SRAM */
420
if (!job->region_bo_handles[i] &&
421
(!cmd_info->region_size[i] || (i == ETHOSU_SRAM_REGION && job->sram_size)))
422
continue;
423
424
if (job->region_bo_handles[i] && !cmd_info->region_size[i]) {
425
dev_err(dev->dev,
426
"Cmdstream BO handle %d set for unused region %d\n",
427
job->region_bo_handles[i], i);
428
ret = -EINVAL;
429
goto out_cleanup_job;
430
}
431
432
gem = drm_gem_object_lookup(file, job->region_bo_handles[i]);
433
if (!gem) {
434
dev_err(dev->dev,
435
"Invalid BO handle %d for region %d\n",
436
job->region_bo_handles[i], i);
437
ret = -ENOENT;
438
goto out_cleanup_job;
439
}
440
441
ejob->region_bo[ejob->region_cnt] = gem;
442
ejob->region_bo_num[ejob->region_cnt] = i;
443
ejob->region_cnt++;
444
445
if (to_ethosu_bo(gem)->info) {
446
dev_err(dev->dev,
447
"Cmdstream BO handle %d used for region %d\n",
448
job->region_bo_handles[i], i);
449
ret = -EINVAL;
450
goto out_cleanup_job;
451
}
452
453
/* Verify the command stream doesn't have accesses outside the BO */
454
if (cmd_info->region_size[i] > gem->size) {
455
dev_err(dev->dev,
456
"cmd stream region %d size greater than BO size (%llu > %zu)\n",
457
i, cmd_info->region_size[i], gem->size);
458
ret = -EOVERFLOW;
459
goto out_cleanup_job;
460
}
461
}
462
ret = ethosu_job_push(ejob);
463
if (!ret) {
464
ethosu_job_put(ejob);
465
return 0;
466
}
467
468
out_cleanup_job:
469
if (ret)
470
drm_sched_job_cleanup(&ejob->base);
471
out_put_job:
472
ethosu_job_err_cleanup(ejob);
473
474
return ret;
475
}
476
477
int ethosu_ioctl_submit(struct drm_device *dev, void *data, struct drm_file *file)
478
{
479
struct drm_ethosu_submit *args = data;
480
int ret = 0;
481
unsigned int i = 0;
482
483
if (args->pad) {
484
drm_dbg(dev, "Reserved field in drm_ethosu_submit struct should be 0.\n");
485
return -EINVAL;
486
}
487
488
struct drm_ethosu_job __free(kvfree) *jobs =
489
kvmalloc_objs(*jobs, args->job_count);
490
if (!jobs)
491
return -ENOMEM;
492
493
if (copy_from_user(jobs,
494
(void __user *)(uintptr_t)args->jobs,
495
args->job_count * sizeof(*jobs))) {
496
drm_dbg(dev, "Failed to copy incoming job array\n");
497
return -EFAULT;
498
}
499
500
for (i = 0; i < args->job_count; i++) {
501
ret = ethosu_ioctl_submit_job(dev, file, &jobs[i]);
502
if (ret)
503
return ret;
504
}
505
506
return 0;
507
}
508
509