Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/riscv/kvm/vcpu_pmu.c
29521 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* Copyright (c) 2023 Rivos Inc
4
*
5
* Authors:
6
* Atish Patra <[email protected]>
7
*/
8
9
#define pr_fmt(fmt) "riscv-kvm-pmu: " fmt
10
#include <linux/errno.h>
11
#include <linux/err.h>
12
#include <linux/kvm_host.h>
13
#include <linux/perf/riscv_pmu.h>
14
#include <asm/csr.h>
15
#include <asm/kvm_vcpu_sbi.h>
16
#include <asm/kvm_vcpu_pmu.h>
17
#include <asm/sbi.h>
18
#include <linux/bitops.h>
19
20
#define kvm_pmu_num_counters(pmu) ((pmu)->num_hw_ctrs + (pmu)->num_fw_ctrs)
21
#define get_event_type(x) (((x) & SBI_PMU_EVENT_IDX_TYPE_MASK) >> 16)
22
#define get_event_code(x) ((x) & SBI_PMU_EVENT_IDX_CODE_MASK)
23
24
static enum perf_hw_id hw_event_perf_map[SBI_PMU_HW_GENERAL_MAX] = {
25
[SBI_PMU_HW_CPU_CYCLES] = PERF_COUNT_HW_CPU_CYCLES,
26
[SBI_PMU_HW_INSTRUCTIONS] = PERF_COUNT_HW_INSTRUCTIONS,
27
[SBI_PMU_HW_CACHE_REFERENCES] = PERF_COUNT_HW_CACHE_REFERENCES,
28
[SBI_PMU_HW_CACHE_MISSES] = PERF_COUNT_HW_CACHE_MISSES,
29
[SBI_PMU_HW_BRANCH_INSTRUCTIONS] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
30
[SBI_PMU_HW_BRANCH_MISSES] = PERF_COUNT_HW_BRANCH_MISSES,
31
[SBI_PMU_HW_BUS_CYCLES] = PERF_COUNT_HW_BUS_CYCLES,
32
[SBI_PMU_HW_STALLED_CYCLES_FRONTEND] = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND,
33
[SBI_PMU_HW_STALLED_CYCLES_BACKEND] = PERF_COUNT_HW_STALLED_CYCLES_BACKEND,
34
[SBI_PMU_HW_REF_CPU_CYCLES] = PERF_COUNT_HW_REF_CPU_CYCLES,
35
};
36
37
static u64 kvm_pmu_get_sample_period(struct kvm_pmc *pmc)
38
{
39
u64 counter_val_mask = GENMASK(pmc->cinfo.width, 0);
40
u64 sample_period;
41
42
if (!pmc->counter_val)
43
sample_period = counter_val_mask;
44
else
45
sample_period = (-pmc->counter_val) & counter_val_mask;
46
47
return sample_period;
48
}
49
50
static u32 kvm_pmu_get_perf_event_type(unsigned long eidx)
51
{
52
enum sbi_pmu_event_type etype = get_event_type(eidx);
53
u32 type = PERF_TYPE_MAX;
54
55
switch (etype) {
56
case SBI_PMU_EVENT_TYPE_HW:
57
type = PERF_TYPE_HARDWARE;
58
break;
59
case SBI_PMU_EVENT_TYPE_CACHE:
60
type = PERF_TYPE_HW_CACHE;
61
break;
62
case SBI_PMU_EVENT_TYPE_RAW:
63
case SBI_PMU_EVENT_TYPE_RAW_V2:
64
case SBI_PMU_EVENT_TYPE_FW:
65
type = PERF_TYPE_RAW;
66
break;
67
default:
68
break;
69
}
70
71
return type;
72
}
73
74
static bool kvm_pmu_is_fw_event(unsigned long eidx)
75
{
76
return get_event_type(eidx) == SBI_PMU_EVENT_TYPE_FW;
77
}
78
79
static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
80
{
81
if (pmc->perf_event) {
82
perf_event_disable(pmc->perf_event);
83
perf_event_release_kernel(pmc->perf_event);
84
pmc->perf_event = NULL;
85
}
86
}
87
88
static u64 kvm_pmu_get_perf_event_hw_config(u32 sbi_event_code)
89
{
90
return hw_event_perf_map[sbi_event_code];
91
}
92
93
static u64 kvm_pmu_get_perf_event_cache_config(u32 sbi_event_code)
94
{
95
u64 config = U64_MAX;
96
unsigned int cache_type, cache_op, cache_result;
97
98
/* All the cache event masks lie within 0xFF. No separate masking is necessary */
99
cache_type = (sbi_event_code & SBI_PMU_EVENT_CACHE_ID_CODE_MASK) >>
100
SBI_PMU_EVENT_CACHE_ID_SHIFT;
101
cache_op = (sbi_event_code & SBI_PMU_EVENT_CACHE_OP_ID_CODE_MASK) >>
102
SBI_PMU_EVENT_CACHE_OP_SHIFT;
103
cache_result = sbi_event_code & SBI_PMU_EVENT_CACHE_RESULT_ID_CODE_MASK;
104
105
if (cache_type >= PERF_COUNT_HW_CACHE_MAX ||
106
cache_op >= PERF_COUNT_HW_CACHE_OP_MAX ||
107
cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
108
return config;
109
110
config = cache_type | (cache_op << 8) | (cache_result << 16);
111
112
return config;
113
}
114
115
static u64 kvm_pmu_get_perf_event_config(unsigned long eidx, uint64_t evt_data)
116
{
117
enum sbi_pmu_event_type etype = get_event_type(eidx);
118
u32 ecode = get_event_code(eidx);
119
u64 config = U64_MAX;
120
121
switch (etype) {
122
case SBI_PMU_EVENT_TYPE_HW:
123
if (ecode < SBI_PMU_HW_GENERAL_MAX)
124
config = kvm_pmu_get_perf_event_hw_config(ecode);
125
break;
126
case SBI_PMU_EVENT_TYPE_CACHE:
127
config = kvm_pmu_get_perf_event_cache_config(ecode);
128
break;
129
case SBI_PMU_EVENT_TYPE_RAW:
130
config = evt_data & RISCV_PMU_RAW_EVENT_MASK;
131
break;
132
case SBI_PMU_EVENT_TYPE_RAW_V2:
133
config = evt_data & RISCV_PMU_RAW_EVENT_V2_MASK;
134
break;
135
case SBI_PMU_EVENT_TYPE_FW:
136
if (ecode < SBI_PMU_FW_MAX)
137
config = (1ULL << 63) | ecode;
138
break;
139
default:
140
break;
141
}
142
143
return config;
144
}
145
146
static int kvm_pmu_get_fixed_pmc_index(unsigned long eidx)
147
{
148
u32 etype = kvm_pmu_get_perf_event_type(eidx);
149
u32 ecode = get_event_code(eidx);
150
151
if (etype != SBI_PMU_EVENT_TYPE_HW)
152
return -EINVAL;
153
154
if (ecode == SBI_PMU_HW_CPU_CYCLES)
155
return 0;
156
else if (ecode == SBI_PMU_HW_INSTRUCTIONS)
157
return 2;
158
else
159
return -EINVAL;
160
}
161
162
static int kvm_pmu_get_programmable_pmc_index(struct kvm_pmu *kvpmu, unsigned long eidx,
163
unsigned long cbase, unsigned long cmask)
164
{
165
int ctr_idx = -1;
166
int i, pmc_idx;
167
int min, max;
168
169
if (kvm_pmu_is_fw_event(eidx)) {
170
/* Firmware counters are mapped 1:1 starting from num_hw_ctrs for simplicity */
171
min = kvpmu->num_hw_ctrs;
172
max = min + kvpmu->num_fw_ctrs;
173
} else {
174
/* First 3 counters are reserved for fixed counters */
175
min = 3;
176
max = kvpmu->num_hw_ctrs;
177
}
178
179
for_each_set_bit(i, &cmask, BITS_PER_LONG) {
180
pmc_idx = i + cbase;
181
if ((pmc_idx >= min && pmc_idx < max) &&
182
!test_bit(pmc_idx, kvpmu->pmc_in_use)) {
183
ctr_idx = pmc_idx;
184
break;
185
}
186
}
187
188
return ctr_idx;
189
}
190
191
static int pmu_get_pmc_index(struct kvm_pmu *pmu, unsigned long eidx,
192
unsigned long cbase, unsigned long cmask)
193
{
194
int ret;
195
196
/* Fixed counters need to be have fixed mapping as they have different width */
197
ret = kvm_pmu_get_fixed_pmc_index(eidx);
198
if (ret >= 0)
199
return ret;
200
201
return kvm_pmu_get_programmable_pmc_index(pmu, eidx, cbase, cmask);
202
}
203
204
static int pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx,
205
unsigned long *out_val)
206
{
207
struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
208
struct kvm_pmc *pmc;
209
int fevent_code;
210
211
if (!IS_ENABLED(CONFIG_32BIT)) {
212
pr_warn("%s: should be invoked for only RV32\n", __func__);
213
return -EINVAL;
214
}
215
216
if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) {
217
pr_warn("Invalid counter id [%ld]during read\n", cidx);
218
return -EINVAL;
219
}
220
221
pmc = &kvpmu->pmc[cidx];
222
223
if (pmc->cinfo.type != SBI_PMU_CTR_TYPE_FW)
224
return -EINVAL;
225
226
fevent_code = get_event_code(pmc->event_idx);
227
pmc->counter_val = kvpmu->fw_event[fevent_code].value;
228
229
*out_val = pmc->counter_val >> 32;
230
231
return 0;
232
}
233
234
static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
235
unsigned long *out_val)
236
{
237
struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
238
struct kvm_pmc *pmc;
239
u64 enabled, running;
240
int fevent_code;
241
242
if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) {
243
pr_warn("Invalid counter id [%ld] during read\n", cidx);
244
return -EINVAL;
245
}
246
247
pmc = &kvpmu->pmc[cidx];
248
249
if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
250
fevent_code = get_event_code(pmc->event_idx);
251
pmc->counter_val = kvpmu->fw_event[fevent_code].value;
252
} else if (pmc->perf_event) {
253
pmc->counter_val += perf_event_read_value(pmc->perf_event, &enabled, &running);
254
} else {
255
return -EINVAL;
256
}
257
*out_val = pmc->counter_val;
258
259
return 0;
260
}
261
262
static int kvm_pmu_validate_counter_mask(struct kvm_pmu *kvpmu, unsigned long ctr_base,
263
unsigned long ctr_mask)
264
{
265
/* Make sure the we have a valid counter mask requested from the caller */
266
if (!ctr_mask || (ctr_base + __fls(ctr_mask) >= kvm_pmu_num_counters(kvpmu)))
267
return -EINVAL;
268
269
return 0;
270
}
271
272
static void kvm_riscv_pmu_overflow(struct perf_event *perf_event,
273
struct perf_sample_data *data,
274
struct pt_regs *regs)
275
{
276
struct kvm_pmc *pmc = perf_event->overflow_handler_context;
277
struct kvm_vcpu *vcpu = pmc->vcpu;
278
struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
279
struct riscv_pmu *rpmu = to_riscv_pmu(perf_event->pmu);
280
u64 period;
281
282
/*
283
* Stop the event counting by directly accessing the perf_event.
284
* Otherwise, this needs to deferred via a workqueue.
285
* That will introduce skew in the counter value because the actual
286
* physical counter would start after returning from this function.
287
* It will be stopped again once the workqueue is scheduled
288
*/
289
rpmu->pmu.stop(perf_event, PERF_EF_UPDATE);
290
291
/*
292
* The hw counter would start automatically when this function returns.
293
* Thus, the host may continue to interrupt and inject it to the guest
294
* even without the guest configuring the next event. Depending on the hardware
295
* the host may have some sluggishness only if privilege mode filtering is not
296
* available. In an ideal world, where qemu is not the only capable hardware,
297
* this can be removed.
298
* FYI: ARM64 does this way while x86 doesn't do anything as such.
299
* TODO: Should we keep it for RISC-V ?
300
*/
301
period = -(local64_read(&perf_event->count));
302
303
local64_set(&perf_event->hw.period_left, 0);
304
perf_event->attr.sample_period = period;
305
perf_event->hw.sample_period = period;
306
307
set_bit(pmc->idx, kvpmu->pmc_overflown);
308
kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_PMU_OVF);
309
310
rpmu->pmu.start(perf_event, PERF_EF_RELOAD);
311
}
312
313
static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr,
314
unsigned long flags, unsigned long eidx,
315
unsigned long evtdata)
316
{
317
struct perf_event *event;
318
319
kvm_pmu_release_perf_event(pmc);
320
attr->config = kvm_pmu_get_perf_event_config(eidx, evtdata);
321
if (flags & SBI_PMU_CFG_FLAG_CLEAR_VALUE) {
322
//TODO: Do we really want to clear the value in hardware counter
323
pmc->counter_val = 0;
324
}
325
326
/*
327
* Set the default sample_period for now. The guest specified value
328
* will be updated in the start call.
329
*/
330
attr->sample_period = kvm_pmu_get_sample_period(pmc);
331
332
event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc);
333
if (IS_ERR(event)) {
334
pr_debug("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event));
335
return PTR_ERR(event);
336
}
337
338
pmc->perf_event = event;
339
if (flags & SBI_PMU_CFG_FLAG_AUTO_START)
340
perf_event_enable(pmc->perf_event);
341
342
return 0;
343
}
344
345
int kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu *vcpu, unsigned long fid)
346
{
347
struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
348
struct kvm_fw_event *fevent;
349
350
if (!kvpmu || fid >= SBI_PMU_FW_MAX)
351
return -EINVAL;
352
353
fevent = &kvpmu->fw_event[fid];
354
if (fevent->started)
355
fevent->value++;
356
357
return 0;
358
}
359
360
int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu *vcpu, unsigned int csr_num,
361
unsigned long *val, unsigned long new_val,
362
unsigned long wr_mask)
363
{
364
struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
365
int cidx, ret = KVM_INSN_CONTINUE_NEXT_SEPC;
366
367
if (!kvpmu || !kvpmu->init_done) {
368
/*
369
* In absence of sscofpmf in the platform, the guest OS may use
370
* the legacy PMU driver to read cycle/instret. In that case,
371
* just return 0 to avoid any illegal trap. However, any other
372
* hpmcounter access should result in illegal trap as they must
373
* be access through SBI PMU only.
374
*/
375
if (csr_num == CSR_CYCLE || csr_num == CSR_INSTRET) {
376
*val = 0;
377
return ret;
378
} else {
379
return KVM_INSN_ILLEGAL_TRAP;
380
}
381
}
382
383
/* The counter CSR are read only. Thus, any write should result in illegal traps */
384
if (wr_mask)
385
return KVM_INSN_ILLEGAL_TRAP;
386
387
cidx = csr_num - CSR_CYCLE;
388
389
if (pmu_ctr_read(vcpu, cidx, val) < 0)
390
return KVM_INSN_ILLEGAL_TRAP;
391
392
return ret;
393
}
394
395
static void kvm_pmu_clear_snapshot_area(struct kvm_vcpu *vcpu)
396
{
397
struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
398
399
kfree(kvpmu->sdata);
400
kvpmu->sdata = NULL;
401
kvpmu->snapshot_addr = INVALID_GPA;
402
}
403
404
int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low,
405
unsigned long saddr_high, unsigned long flags,
406
struct kvm_vcpu_sbi_return *retdata)
407
{
408
struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
409
int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data);
410
int sbiret = 0;
411
gpa_t saddr;
412
413
if (!kvpmu || flags) {
414
sbiret = SBI_ERR_INVALID_PARAM;
415
goto out;
416
}
417
418
if (saddr_low == SBI_SHMEM_DISABLE && saddr_high == SBI_SHMEM_DISABLE) {
419
kvm_pmu_clear_snapshot_area(vcpu);
420
return 0;
421
}
422
423
saddr = saddr_low;
424
425
if (saddr_high != 0) {
426
if (IS_ENABLED(CONFIG_32BIT))
427
saddr |= ((gpa_t)saddr_high << 32);
428
else
429
sbiret = SBI_ERR_INVALID_ADDRESS;
430
goto out;
431
}
432
433
kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC);
434
if (!kvpmu->sdata)
435
return -ENOMEM;
436
437
/* No need to check writable slot explicitly as kvm_vcpu_write_guest does it internally */
438
if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) {
439
kfree(kvpmu->sdata);
440
sbiret = SBI_ERR_INVALID_ADDRESS;
441
goto out;
442
}
443
444
kvpmu->snapshot_addr = saddr;
445
446
out:
447
retdata->err_val = sbiret;
448
449
return 0;
450
}
451
452
int kvm_riscv_vcpu_pmu_event_info(struct kvm_vcpu *vcpu, unsigned long saddr_low,
453
unsigned long saddr_high, unsigned long num_events,
454
unsigned long flags, struct kvm_vcpu_sbi_return *retdata)
455
{
456
struct riscv_pmu_event_info *einfo = NULL;
457
int shmem_size = num_events * sizeof(*einfo);
458
gpa_t shmem;
459
u32 eidx, etype;
460
u64 econfig;
461
int ret;
462
463
if (flags != 0 || (saddr_low & (SZ_16 - 1) || num_events == 0)) {
464
ret = SBI_ERR_INVALID_PARAM;
465
goto out;
466
}
467
468
shmem = saddr_low;
469
if (saddr_high != 0) {
470
if (IS_ENABLED(CONFIG_32BIT)) {
471
shmem |= ((gpa_t)saddr_high << 32);
472
} else {
473
ret = SBI_ERR_INVALID_ADDRESS;
474
goto out;
475
}
476
}
477
478
einfo = kzalloc(shmem_size, GFP_KERNEL);
479
if (!einfo)
480
return -ENOMEM;
481
482
ret = kvm_vcpu_read_guest(vcpu, shmem, einfo, shmem_size);
483
if (ret) {
484
ret = SBI_ERR_FAILURE;
485
goto free_mem;
486
}
487
488
for (int i = 0; i < num_events; i++) {
489
eidx = einfo[i].event_idx;
490
etype = kvm_pmu_get_perf_event_type(eidx);
491
econfig = kvm_pmu_get_perf_event_config(eidx, einfo[i].event_data);
492
ret = riscv_pmu_get_event_info(etype, econfig, NULL);
493
einfo[i].output = (ret > 0) ? 1 : 0;
494
}
495
496
ret = kvm_vcpu_write_guest(vcpu, shmem, einfo, shmem_size);
497
if (ret) {
498
ret = SBI_ERR_INVALID_ADDRESS;
499
goto free_mem;
500
}
501
502
ret = 0;
503
free_mem:
504
kfree(einfo);
505
out:
506
retdata->err_val = ret;
507
508
return 0;
509
}
510
511
int kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu *vcpu,
512
struct kvm_vcpu_sbi_return *retdata)
513
{
514
struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
515
516
retdata->out_val = kvm_pmu_num_counters(kvpmu);
517
518
return 0;
519
}
520
521
int kvm_riscv_vcpu_pmu_ctr_info(struct kvm_vcpu *vcpu, unsigned long cidx,
522
struct kvm_vcpu_sbi_return *retdata)
523
{
524
struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
525
526
if (cidx > RISCV_KVM_MAX_COUNTERS || cidx == 1) {
527
retdata->err_val = SBI_ERR_INVALID_PARAM;
528
return 0;
529
}
530
531
retdata->out_val = kvpmu->pmc[cidx].cinfo.value;
532
533
return 0;
534
}
535
536
int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base,
537
unsigned long ctr_mask, unsigned long flags, u64 ival,
538
struct kvm_vcpu_sbi_return *retdata)
539
{
540
struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
541
int i, pmc_index, sbiret = 0;
542
struct kvm_pmc *pmc;
543
int fevent_code;
544
bool snap_flag_set = flags & SBI_PMU_START_FLAG_INIT_SNAPSHOT;
545
546
if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
547
sbiret = SBI_ERR_INVALID_PARAM;
548
goto out;
549
}
550
551
if (snap_flag_set) {
552
if (kvpmu->snapshot_addr == INVALID_GPA) {
553
sbiret = SBI_ERR_NO_SHMEM;
554
goto out;
555
}
556
if (kvm_vcpu_read_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata,
557
sizeof(struct riscv_pmu_snapshot_data))) {
558
pr_warn("Unable to read snapshot shared memory while starting counters\n");
559
sbiret = SBI_ERR_FAILURE;
560
goto out;
561
}
562
}
563
/* Start the counters that have been configured and requested by the guest */
564
for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) {
565
pmc_index = i + ctr_base;
566
if (!test_bit(pmc_index, kvpmu->pmc_in_use))
567
continue;
568
/* The guest started the counter again. Reset the overflow status */
569
clear_bit(pmc_index, kvpmu->pmc_overflown);
570
pmc = &kvpmu->pmc[pmc_index];
571
if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE) {
572
pmc->counter_val = ival;
573
} else if (snap_flag_set) {
574
/* The counter index in the snapshot are relative to the counter base */
575
pmc->counter_val = kvpmu->sdata->ctr_values[i];
576
}
577
578
if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
579
fevent_code = get_event_code(pmc->event_idx);
580
if (fevent_code >= SBI_PMU_FW_MAX) {
581
sbiret = SBI_ERR_INVALID_PARAM;
582
goto out;
583
}
584
585
/* Check if the counter was already started for some reason */
586
if (kvpmu->fw_event[fevent_code].started) {
587
sbiret = SBI_ERR_ALREADY_STARTED;
588
continue;
589
}
590
591
kvpmu->fw_event[fevent_code].started = true;
592
kvpmu->fw_event[fevent_code].value = pmc->counter_val;
593
} else if (pmc->perf_event) {
594
if (unlikely(pmc->started)) {
595
sbiret = SBI_ERR_ALREADY_STARTED;
596
continue;
597
}
598
perf_event_period(pmc->perf_event, kvm_pmu_get_sample_period(pmc));
599
perf_event_enable(pmc->perf_event);
600
pmc->started = true;
601
} else {
602
sbiret = SBI_ERR_INVALID_PARAM;
603
}
604
}
605
606
out:
607
retdata->err_val = sbiret;
608
609
return 0;
610
}
611
612
int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base,
613
unsigned long ctr_mask, unsigned long flags,
614
struct kvm_vcpu_sbi_return *retdata)
615
{
616
struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
617
int i, pmc_index, sbiret = 0;
618
u64 enabled, running;
619
struct kvm_pmc *pmc;
620
int fevent_code;
621
bool snap_flag_set = flags & SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT;
622
bool shmem_needs_update = false;
623
624
if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
625
sbiret = SBI_ERR_INVALID_PARAM;
626
goto out;
627
}
628
629
if (snap_flag_set && kvpmu->snapshot_addr == INVALID_GPA) {
630
sbiret = SBI_ERR_NO_SHMEM;
631
goto out;
632
}
633
634
/* Stop the counters that have been configured and requested by the guest */
635
for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) {
636
pmc_index = i + ctr_base;
637
if (!test_bit(pmc_index, kvpmu->pmc_in_use))
638
continue;
639
pmc = &kvpmu->pmc[pmc_index];
640
if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
641
fevent_code = get_event_code(pmc->event_idx);
642
if (fevent_code >= SBI_PMU_FW_MAX) {
643
sbiret = SBI_ERR_INVALID_PARAM;
644
goto out;
645
}
646
647
if (!kvpmu->fw_event[fevent_code].started)
648
sbiret = SBI_ERR_ALREADY_STOPPED;
649
650
kvpmu->fw_event[fevent_code].started = false;
651
} else if (pmc->perf_event) {
652
if (pmc->started) {
653
/* Stop counting the counter */
654
perf_event_disable(pmc->perf_event);
655
pmc->started = false;
656
} else {
657
sbiret = SBI_ERR_ALREADY_STOPPED;
658
}
659
660
if (flags & SBI_PMU_STOP_FLAG_RESET)
661
/* Release the counter if this is a reset request */
662
kvm_pmu_release_perf_event(pmc);
663
} else {
664
sbiret = SBI_ERR_INVALID_PARAM;
665
}
666
667
if (snap_flag_set && !sbiret) {
668
if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW)
669
pmc->counter_val = kvpmu->fw_event[fevent_code].value;
670
else if (pmc->perf_event)
671
pmc->counter_val += perf_event_read_value(pmc->perf_event,
672
&enabled, &running);
673
/*
674
* The counter and overflow indicies in the snapshot region are w.r.to
675
* cbase. Modify the set bit in the counter mask instead of the pmc_index
676
* which indicates the absolute counter index.
677
*/
678
if (test_bit(pmc_index, kvpmu->pmc_overflown))
679
kvpmu->sdata->ctr_overflow_mask |= BIT(i);
680
kvpmu->sdata->ctr_values[i] = pmc->counter_val;
681
shmem_needs_update = true;
682
}
683
684
if (flags & SBI_PMU_STOP_FLAG_RESET) {
685
pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
686
clear_bit(pmc_index, kvpmu->pmc_in_use);
687
clear_bit(pmc_index, kvpmu->pmc_overflown);
688
if (snap_flag_set) {
689
/*
690
* Only clear the given counter as the caller is responsible to
691
* validate both the overflow mask and configured counters.
692
*/
693
kvpmu->sdata->ctr_overflow_mask &= ~BIT(i);
694
shmem_needs_update = true;
695
}
696
}
697
}
698
699
if (shmem_needs_update)
700
kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata,
701
sizeof(struct riscv_pmu_snapshot_data));
702
703
out:
704
retdata->err_val = sbiret;
705
706
return 0;
707
}
708
709
int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_base,
710
unsigned long ctr_mask, unsigned long flags,
711
unsigned long eidx, u64 evtdata,
712
struct kvm_vcpu_sbi_return *retdata)
713
{
714
int ctr_idx, sbiret = 0;
715
long ret;
716
bool is_fevent;
717
unsigned long event_code;
718
u32 etype = kvm_pmu_get_perf_event_type(eidx);
719
struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
720
struct kvm_pmc *pmc = NULL;
721
struct perf_event_attr attr = {
722
.type = etype,
723
.size = sizeof(struct perf_event_attr),
724
.pinned = true,
725
.disabled = true,
726
/*
727
* It should never reach here if the platform doesn't support the sscofpmf
728
* extension as mode filtering won't work without it.
729
*/
730
.exclude_host = true,
731
.exclude_hv = true,
732
.exclude_user = !!(flags & SBI_PMU_CFG_FLAG_SET_UINH),
733
.exclude_kernel = !!(flags & SBI_PMU_CFG_FLAG_SET_SINH),
734
.config1 = RISCV_PMU_CONFIG1_GUEST_EVENTS,
735
};
736
737
if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
738
sbiret = SBI_ERR_INVALID_PARAM;
739
goto out;
740
}
741
742
event_code = get_event_code(eidx);
743
is_fevent = kvm_pmu_is_fw_event(eidx);
744
if (is_fevent && event_code >= SBI_PMU_FW_MAX) {
745
sbiret = SBI_ERR_NOT_SUPPORTED;
746
goto out;
747
}
748
749
/*
750
* SKIP_MATCH flag indicates the caller is aware of the assigned counter
751
* for this event. Just do a sanity check if it already marked used.
752
*/
753
if (flags & SBI_PMU_CFG_FLAG_SKIP_MATCH) {
754
if (!test_bit(ctr_base + __ffs(ctr_mask), kvpmu->pmc_in_use)) {
755
sbiret = SBI_ERR_FAILURE;
756
goto out;
757
}
758
ctr_idx = ctr_base + __ffs(ctr_mask);
759
} else {
760
ctr_idx = pmu_get_pmc_index(kvpmu, eidx, ctr_base, ctr_mask);
761
if (ctr_idx < 0) {
762
sbiret = SBI_ERR_NOT_SUPPORTED;
763
goto out;
764
}
765
}
766
767
pmc = &kvpmu->pmc[ctr_idx];
768
pmc->idx = ctr_idx;
769
770
if (is_fevent) {
771
if (flags & SBI_PMU_CFG_FLAG_AUTO_START)
772
kvpmu->fw_event[event_code].started = true;
773
} else {
774
ret = kvm_pmu_create_perf_event(pmc, &attr, flags, eidx, evtdata);
775
if (ret) {
776
sbiret = SBI_ERR_NOT_SUPPORTED;
777
goto out;
778
}
779
}
780
781
set_bit(ctr_idx, kvpmu->pmc_in_use);
782
pmc->event_idx = eidx;
783
retdata->out_val = ctr_idx;
784
out:
785
retdata->err_val = sbiret;
786
787
return 0;
788
}
789
790
int kvm_riscv_vcpu_pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx,
791
struct kvm_vcpu_sbi_return *retdata)
792
{
793
int ret;
794
795
ret = pmu_fw_ctr_read_hi(vcpu, cidx, &retdata->out_val);
796
if (ret == -EINVAL)
797
retdata->err_val = SBI_ERR_INVALID_PARAM;
798
799
return 0;
800
}
801
802
int kvm_riscv_vcpu_pmu_fw_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
803
struct kvm_vcpu_sbi_return *retdata)
804
{
805
int ret;
806
807
ret = pmu_ctr_read(vcpu, cidx, &retdata->out_val);
808
if (ret == -EINVAL)
809
retdata->err_val = SBI_ERR_INVALID_PARAM;
810
811
return 0;
812
}
813
814
void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
815
{
816
int i = 0, ret, num_hw_ctrs = 0, hpm_width = 0;
817
struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
818
struct kvm_pmc *pmc;
819
820
/*
821
* PMU functionality should be only available to guests if privilege mode
822
* filtering is available in the host. Otherwise, guest will always count
823
* events while the execution is in hypervisor mode.
824
*/
825
if (!riscv_isa_extension_available(NULL, SSCOFPMF))
826
return;
827
828
ret = riscv_pmu_get_hpm_info(&hpm_width, &num_hw_ctrs);
829
if (ret < 0 || !hpm_width || !num_hw_ctrs)
830
return;
831
832
/*
833
* Increase the number of hardware counters to offset the time counter.
834
*/
835
kvpmu->num_hw_ctrs = num_hw_ctrs + 1;
836
kvpmu->num_fw_ctrs = SBI_PMU_FW_MAX;
837
memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
838
kvpmu->snapshot_addr = INVALID_GPA;
839
840
if (kvpmu->num_hw_ctrs > RISCV_KVM_MAX_HW_CTRS) {
841
pr_warn_once("Limiting the hardware counters to 32 as specified by the ISA");
842
kvpmu->num_hw_ctrs = RISCV_KVM_MAX_HW_CTRS;
843
}
844
845
/*
846
* There is no correlation between the logical hardware counter and virtual counters.
847
* However, we need to encode a hpmcounter CSR in the counter info field so that
848
* KVM can trap n emulate the read. This works well in the migration use case as
849
* KVM doesn't care if the actual hpmcounter is available in the hardware or not.
850
*/
851
for (i = 0; i < kvm_pmu_num_counters(kvpmu); i++) {
852
/* TIME CSR shouldn't be read from perf interface */
853
if (i == 1)
854
continue;
855
pmc = &kvpmu->pmc[i];
856
pmc->idx = i;
857
pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
858
pmc->vcpu = vcpu;
859
if (i < kvpmu->num_hw_ctrs) {
860
pmc->cinfo.type = SBI_PMU_CTR_TYPE_HW;
861
if (i < 3)
862
/* CY, IR counters */
863
pmc->cinfo.width = 63;
864
else
865
pmc->cinfo.width = hpm_width;
866
/*
867
* The CSR number doesn't have any relation with the logical
868
* hardware counters. The CSR numbers are encoded sequentially
869
* to avoid maintaining a map between the virtual counter
870
* and CSR number.
871
*/
872
pmc->cinfo.csr = CSR_CYCLE + i;
873
} else {
874
pmc->cinfo.type = SBI_PMU_CTR_TYPE_FW;
875
pmc->cinfo.width = 63;
876
}
877
}
878
879
kvpmu->init_done = true;
880
}
881
882
void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu)
883
{
884
struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
885
struct kvm_pmc *pmc;
886
int i;
887
888
if (!kvpmu)
889
return;
890
891
for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS) {
892
pmc = &kvpmu->pmc[i];
893
pmc->counter_val = 0;
894
kvm_pmu_release_perf_event(pmc);
895
pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
896
}
897
bitmap_zero(kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS);
898
bitmap_zero(kvpmu->pmc_overflown, RISCV_KVM_MAX_COUNTERS);
899
memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
900
kvm_pmu_clear_snapshot_area(vcpu);
901
}
902
903
void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu)
904
{
905
kvm_riscv_vcpu_pmu_deinit(vcpu);
906
}
907
908