CoCalc -- vcpu

GitHub Repository: torvalds/linux
Path: blob/master/arch/riscv/kvm/vcpu_pmu.c
²⁹⁵²¹ views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
 * Copyright (c) 2023 Rivos Inc
4
 *
5
 * Authors:
6
 *     Atish Patra <[email protected]>
7
 */
8

9
#define pr_fmt(fmt)	"riscv-kvm-pmu: " fmt
10
#include <linux/errno.h>
11
#include <linux/err.h>
12
#include <linux/kvm_host.h>
13
#include <linux/perf/riscv_pmu.h>
14
#include <asm/csr.h>
15
#include <asm/kvm_vcpu_sbi.h>
16
#include <asm/kvm_vcpu_pmu.h>
17
#include <asm/sbi.h>
18
#include <linux/bitops.h>
19

20
#define kvm_pmu_num_counters(pmu) ((pmu)->num_hw_ctrs + (pmu)->num_fw_ctrs)
21
#define get_event_type(x) (((x) & SBI_PMU_EVENT_IDX_TYPE_MASK) >> 16)
22
#define get_event_code(x) ((x) & SBI_PMU_EVENT_IDX_CODE_MASK)
23

24
static enum perf_hw_id hw_event_perf_map[SBI_PMU_HW_GENERAL_MAX] = {
25
	[SBI_PMU_HW_CPU_CYCLES] = PERF_COUNT_HW_CPU_CYCLES,
26
	[SBI_PMU_HW_INSTRUCTIONS] = PERF_COUNT_HW_INSTRUCTIONS,
27
	[SBI_PMU_HW_CACHE_REFERENCES] = PERF_COUNT_HW_CACHE_REFERENCES,
28
	[SBI_PMU_HW_CACHE_MISSES] = PERF_COUNT_HW_CACHE_MISSES,
29
	[SBI_PMU_HW_BRANCH_INSTRUCTIONS] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
30
	[SBI_PMU_HW_BRANCH_MISSES] = PERF_COUNT_HW_BRANCH_MISSES,
31
	[SBI_PMU_HW_BUS_CYCLES] = PERF_COUNT_HW_BUS_CYCLES,
32
	[SBI_PMU_HW_STALLED_CYCLES_FRONTEND] = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND,
33
	[SBI_PMU_HW_STALLED_CYCLES_BACKEND] = PERF_COUNT_HW_STALLED_CYCLES_BACKEND,
34
	[SBI_PMU_HW_REF_CPU_CYCLES] = PERF_COUNT_HW_REF_CPU_CYCLES,
35
};
36

37
static u64 kvm_pmu_get_sample_period(struct kvm_pmc *pmc)
38
{
39
	u64 counter_val_mask = GENMASK(pmc->cinfo.width, 0);
40
	u64 sample_period;
41

42
	if (!pmc->counter_val)
43
		sample_period = counter_val_mask;
44
	else
45
		sample_period = (-pmc->counter_val) & counter_val_mask;
46

47
	return sample_period;
48
}
49

50
static u32 kvm_pmu_get_perf_event_type(unsigned long eidx)
51
{
52
	enum sbi_pmu_event_type etype = get_event_type(eidx);
53
	u32 type = PERF_TYPE_MAX;
54

55
	switch (etype) {
56
	case SBI_PMU_EVENT_TYPE_HW:
57
		type = PERF_TYPE_HARDWARE;
58
		break;
59
	case SBI_PMU_EVENT_TYPE_CACHE:
60
		type = PERF_TYPE_HW_CACHE;
61
		break;
62
	case SBI_PMU_EVENT_TYPE_RAW:
63
	case SBI_PMU_EVENT_TYPE_RAW_V2:
64
	case SBI_PMU_EVENT_TYPE_FW:
65
		type = PERF_TYPE_RAW;
66
		break;
67
	default:
68
		break;
69
	}
70

71
	return type;
72
}
73

74
static bool kvm_pmu_is_fw_event(unsigned long eidx)
75
{
76
	return get_event_type(eidx) == SBI_PMU_EVENT_TYPE_FW;
77
}
78

79
static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
80
{
81
	if (pmc->perf_event) {
82
		perf_event_disable(pmc->perf_event);
83
		perf_event_release_kernel(pmc->perf_event);
84
		pmc->perf_event = NULL;
85
	}
86
}
87

88
static u64 kvm_pmu_get_perf_event_hw_config(u32 sbi_event_code)
89
{
90
	return hw_event_perf_map[sbi_event_code];
91
}
92

93
static u64 kvm_pmu_get_perf_event_cache_config(u32 sbi_event_code)
94
{
95
	u64 config = U64_MAX;
96
	unsigned int cache_type, cache_op, cache_result;
97

98
	/* All the cache event masks lie within 0xFF. No separate masking is necessary */
99
	cache_type = (sbi_event_code & SBI_PMU_EVENT_CACHE_ID_CODE_MASK) >>
100
		      SBI_PMU_EVENT_CACHE_ID_SHIFT;
101
	cache_op = (sbi_event_code & SBI_PMU_EVENT_CACHE_OP_ID_CODE_MASK) >>
102
		    SBI_PMU_EVENT_CACHE_OP_SHIFT;
103
	cache_result = sbi_event_code & SBI_PMU_EVENT_CACHE_RESULT_ID_CODE_MASK;
104

105
	if (cache_type >= PERF_COUNT_HW_CACHE_MAX ||
106
	    cache_op >= PERF_COUNT_HW_CACHE_OP_MAX ||
107
	    cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
108
		return config;
109

110
	config = cache_type | (cache_op << 8) | (cache_result << 16);
111

112
	return config;
113
}
114

115
static u64 kvm_pmu_get_perf_event_config(unsigned long eidx, uint64_t evt_data)
116
{
117
	enum sbi_pmu_event_type etype = get_event_type(eidx);
118
	u32 ecode = get_event_code(eidx);
119
	u64 config = U64_MAX;
120

121
	switch (etype) {
122
	case SBI_PMU_EVENT_TYPE_HW:
123
		if (ecode < SBI_PMU_HW_GENERAL_MAX)
124
			config = kvm_pmu_get_perf_event_hw_config(ecode);
125
		break;
126
	case SBI_PMU_EVENT_TYPE_CACHE:
127
		config = kvm_pmu_get_perf_event_cache_config(ecode);
128
		break;
129
	case SBI_PMU_EVENT_TYPE_RAW:
130
		config = evt_data & RISCV_PMU_RAW_EVENT_MASK;
131
		break;
132
	case SBI_PMU_EVENT_TYPE_RAW_V2:
133
		config = evt_data & RISCV_PMU_RAW_EVENT_V2_MASK;
134
		break;
135
	case SBI_PMU_EVENT_TYPE_FW:
136
		if (ecode < SBI_PMU_FW_MAX)
137
			config = (1ULL << 63) | ecode;
138
		break;
139
	default:
140
		break;
141
	}
142

143
	return config;
144
}
145

146
static int kvm_pmu_get_fixed_pmc_index(unsigned long eidx)
147
{
148
	u32 etype = kvm_pmu_get_perf_event_type(eidx);
149
	u32 ecode = get_event_code(eidx);
150

151
	if (etype != SBI_PMU_EVENT_TYPE_HW)
152
		return -EINVAL;
153

154
	if (ecode == SBI_PMU_HW_CPU_CYCLES)
155
		return 0;
156
	else if (ecode == SBI_PMU_HW_INSTRUCTIONS)
157
		return 2;
158
	else
159
		return -EINVAL;
160
}
161

162
static int kvm_pmu_get_programmable_pmc_index(struct kvm_pmu *kvpmu, unsigned long eidx,
163
					      unsigned long cbase, unsigned long cmask)
164
{
165
	int ctr_idx = -1;
166
	int i, pmc_idx;
167
	int min, max;
168

169
	if (kvm_pmu_is_fw_event(eidx)) {
170
		/* Firmware counters are mapped 1:1 starting from num_hw_ctrs for simplicity */
171
		min = kvpmu->num_hw_ctrs;
172
		max = min + kvpmu->num_fw_ctrs;
173
	} else {
174
		/* First 3 counters are reserved for fixed counters */
175
		min = 3;
176
		max = kvpmu->num_hw_ctrs;
177
	}
178

179
	for_each_set_bit(i, &cmask, BITS_PER_LONG) {
180
		pmc_idx = i + cbase;
181
		if ((pmc_idx >= min && pmc_idx < max) &&
182
		    !test_bit(pmc_idx, kvpmu->pmc_in_use)) {
183
			ctr_idx = pmc_idx;
184
			break;
185
		}
186
	}
187

188
	return ctr_idx;
189
}
190

191
static int pmu_get_pmc_index(struct kvm_pmu *pmu, unsigned long eidx,
192
			     unsigned long cbase, unsigned long cmask)
193
{
194
	int ret;
195

196
	/* Fixed counters need to be have fixed mapping as they have different width */
197
	ret = kvm_pmu_get_fixed_pmc_index(eidx);
198
	if (ret >= 0)
199
		return ret;
200

201
	return kvm_pmu_get_programmable_pmc_index(pmu, eidx, cbase, cmask);
202
}
203

204
static int pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx,
205
			      unsigned long *out_val)
206
{
207
	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
208
	struct kvm_pmc *pmc;
209
	int fevent_code;
210

211
	if (!IS_ENABLED(CONFIG_32BIT)) {
212
		pr_warn("%s: should be invoked for only RV32\n", __func__);
213
		return -EINVAL;
214
	}
215

216
	if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) {
217
		pr_warn("Invalid counter id [%ld]during read\n", cidx);
218
		return -EINVAL;
219
	}
220

221
	pmc = &kvpmu->pmc[cidx];
222

223
	if (pmc->cinfo.type != SBI_PMU_CTR_TYPE_FW)
224
		return -EINVAL;
225

226
	fevent_code = get_event_code(pmc->event_idx);
227
	pmc->counter_val = kvpmu->fw_event[fevent_code].value;
228

229
	*out_val = pmc->counter_val >> 32;
230

231
	return 0;
232
}
233

234
static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
235
			unsigned long *out_val)
236
{
237
	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
238
	struct kvm_pmc *pmc;
239
	u64 enabled, running;
240
	int fevent_code;
241

242
	if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) {
243
		pr_warn("Invalid counter id [%ld] during read\n", cidx);
244
		return -EINVAL;
245
	}
246

247
	pmc = &kvpmu->pmc[cidx];
248

249
	if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
250
		fevent_code = get_event_code(pmc->event_idx);
251
		pmc->counter_val = kvpmu->fw_event[fevent_code].value;
252
	} else if (pmc->perf_event) {
253
		pmc->counter_val += perf_event_read_value(pmc->perf_event, &enabled, &running);
254
	} else {
255
		return -EINVAL;
256
	}
257
	*out_val = pmc->counter_val;
258

259
	return 0;
260
}
261

262
static int kvm_pmu_validate_counter_mask(struct kvm_pmu *kvpmu, unsigned long ctr_base,
263
					 unsigned long ctr_mask)
264
{
265
	/* Make sure the we have a valid counter mask requested from the caller */
266
	if (!ctr_mask || (ctr_base + __fls(ctr_mask) >= kvm_pmu_num_counters(kvpmu)))
267
		return -EINVAL;
268

269
	return 0;
270
}
271

272
static void kvm_riscv_pmu_overflow(struct perf_event *perf_event,
273
				   struct perf_sample_data *data,
274
				   struct pt_regs *regs)
275
{
276
	struct kvm_pmc *pmc = perf_event->overflow_handler_context;
277
	struct kvm_vcpu *vcpu = pmc->vcpu;
278
	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
279
	struct riscv_pmu *rpmu = to_riscv_pmu(perf_event->pmu);
280
	u64 period;
281

282
	/*
283
	 * Stop the event counting by directly accessing the perf_event.
284
	 * Otherwise, this needs to deferred via a workqueue.
285
	 * That will introduce skew in the counter value because the actual
286
	 * physical counter would start after returning from this function.
287
	 * It will be stopped again once the workqueue is scheduled
288
	 */
289
	rpmu->pmu.stop(perf_event, PERF_EF_UPDATE);
290

291
	/*
292
	 * The hw counter would start automatically when this function returns.
293
	 * Thus, the host may continue to interrupt and inject it to the guest
294
	 * even without the guest configuring the next event. Depending on the hardware
295
	 * the host may have some sluggishness only if privilege mode filtering is not
296
	 * available. In an ideal world, where qemu is not the only capable hardware,
297
	 * this can be removed.
298
	 * FYI: ARM64 does this way while x86 doesn't do anything as such.
299
	 * TODO: Should we keep it for RISC-V ?
300
	 */
301
	period = -(local64_read(&perf_event->count));
302

303
	local64_set(&perf_event->hw.period_left, 0);
304
	perf_event->attr.sample_period = period;
305
	perf_event->hw.sample_period = period;
306

307
	set_bit(pmc->idx, kvpmu->pmc_overflown);
308
	kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_PMU_OVF);
309

310
	rpmu->pmu.start(perf_event, PERF_EF_RELOAD);
311
}
312

313
static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr,
314
				      unsigned long flags, unsigned long eidx,
315
				      unsigned long evtdata)
316
{
317
	struct perf_event *event;
318

319
	kvm_pmu_release_perf_event(pmc);
320
	attr->config = kvm_pmu_get_perf_event_config(eidx, evtdata);
321
	if (flags & SBI_PMU_CFG_FLAG_CLEAR_VALUE) {
322
		//TODO: Do we really want to clear the value in hardware counter
323
		pmc->counter_val = 0;
324
	}
325

326
	/*
327
	 * Set the default sample_period for now. The guest specified value
328
	 * will be updated in the start call.
329
	 */
330
	attr->sample_period = kvm_pmu_get_sample_period(pmc);
331

332
	event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc);
333
	if (IS_ERR(event)) {
334
		pr_debug("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event));
335
		return PTR_ERR(event);
336
	}
337

338
	pmc->perf_event = event;
339
	if (flags & SBI_PMU_CFG_FLAG_AUTO_START)
340
		perf_event_enable(pmc->perf_event);
341

342
	return 0;
343
}
344

345
int kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu *vcpu, unsigned long fid)
346
{
347
	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
348
	struct kvm_fw_event *fevent;
349

350
	if (!kvpmu || fid >= SBI_PMU_FW_MAX)
351
		return -EINVAL;
352

353
	fevent = &kvpmu->fw_event[fid];
354
	if (fevent->started)
355
		fevent->value++;
356

357
	return 0;
358
}
359

360
int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu *vcpu, unsigned int csr_num,
361
				unsigned long *val, unsigned long new_val,
362
				unsigned long wr_mask)
363
{
364
	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
365
	int cidx, ret = KVM_INSN_CONTINUE_NEXT_SEPC;
366

367
	if (!kvpmu || !kvpmu->init_done) {
368
		/*
369
		 * In absence of sscofpmf in the platform, the guest OS may use
370
		 * the legacy PMU driver to read cycle/instret. In that case,
371
		 * just return 0 to avoid any illegal trap. However, any other
372
		 * hpmcounter access should result in illegal trap as they must
373
		 * be access through SBI PMU only.
374
		 */
375
		if (csr_num == CSR_CYCLE || csr_num == CSR_INSTRET) {
376
			*val = 0;
377
			return ret;
378
		} else {
379
			return KVM_INSN_ILLEGAL_TRAP;
380
		}
381
	}
382

383
	/* The counter CSR are read only. Thus, any write should result in illegal traps */
384
	if (wr_mask)
385
		return KVM_INSN_ILLEGAL_TRAP;
386

387
	cidx = csr_num - CSR_CYCLE;
388

389
	if (pmu_ctr_read(vcpu, cidx, val) < 0)
390
		return KVM_INSN_ILLEGAL_TRAP;
391

392
	return ret;
393
}
394

395
static void kvm_pmu_clear_snapshot_area(struct kvm_vcpu *vcpu)
396
{
397
	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
398

399
	kfree(kvpmu->sdata);
400
	kvpmu->sdata = NULL;
401
	kvpmu->snapshot_addr = INVALID_GPA;
402
}
403

404
int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low,
405
				      unsigned long saddr_high, unsigned long flags,
406
				      struct kvm_vcpu_sbi_return *retdata)
407
{
408
	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
409
	int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data);
410
	int sbiret = 0;
411
	gpa_t saddr;
412

413
	if (!kvpmu || flags) {
414
		sbiret = SBI_ERR_INVALID_PARAM;
415
		goto out;
416
	}
417

418
	if (saddr_low == SBI_SHMEM_DISABLE && saddr_high == SBI_SHMEM_DISABLE) {
419
		kvm_pmu_clear_snapshot_area(vcpu);
420
		return 0;
421
	}
422

423
	saddr = saddr_low;
424

425
	if (saddr_high != 0) {
426
		if (IS_ENABLED(CONFIG_32BIT))
427
			saddr |= ((gpa_t)saddr_high << 32);
428
		else
429
			sbiret = SBI_ERR_INVALID_ADDRESS;
430
		goto out;
431
	}
432

433
	kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC);
434
	if (!kvpmu->sdata)
435
		return -ENOMEM;
436

437
	/* No need to check writable slot explicitly as kvm_vcpu_write_guest does it internally */
438
	if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) {
439
		kfree(kvpmu->sdata);
440
		sbiret = SBI_ERR_INVALID_ADDRESS;
441
		goto out;
442
	}
443

444
	kvpmu->snapshot_addr = saddr;
445

446
out:
447
	retdata->err_val = sbiret;
448

449
	return 0;
450
}
451

452
int kvm_riscv_vcpu_pmu_event_info(struct kvm_vcpu *vcpu, unsigned long saddr_low,
453
				  unsigned long saddr_high, unsigned long num_events,
454
				  unsigned long flags, struct kvm_vcpu_sbi_return *retdata)
455
{
456
	struct riscv_pmu_event_info *einfo = NULL;
457
	int shmem_size = num_events * sizeof(*einfo);
458
	gpa_t shmem;
459
	u32 eidx, etype;
460
	u64 econfig;
461
	int ret;
462

463
	if (flags != 0 || (saddr_low & (SZ_16 - 1) || num_events == 0)) {
464
		ret = SBI_ERR_INVALID_PARAM;
465
		goto out;
466
	}
467

468
	shmem = saddr_low;
469
	if (saddr_high != 0) {
470
		if (IS_ENABLED(CONFIG_32BIT)) {
471
			shmem |= ((gpa_t)saddr_high << 32);
472
		} else {
473
			ret = SBI_ERR_INVALID_ADDRESS;
474
			goto out;
475
		}
476
	}
477

478
	einfo = kzalloc(shmem_size, GFP_KERNEL);
479
	if (!einfo)
480
		return -ENOMEM;
481

482
	ret = kvm_vcpu_read_guest(vcpu, shmem, einfo, shmem_size);
483
	if (ret) {
484
		ret = SBI_ERR_FAILURE;
485
		goto free_mem;
486
	}
487

488
	for (int i = 0; i < num_events; i++) {
489
		eidx = einfo[i].event_idx;
490
		etype = kvm_pmu_get_perf_event_type(eidx);
491
		econfig = kvm_pmu_get_perf_event_config(eidx, einfo[i].event_data);
492
		ret = riscv_pmu_get_event_info(etype, econfig, NULL);
493
		einfo[i].output = (ret > 0) ? 1 : 0;
494
	}
495

496
	ret = kvm_vcpu_write_guest(vcpu, shmem, einfo, shmem_size);
497
	if (ret) {
498
		ret = SBI_ERR_INVALID_ADDRESS;
499
		goto free_mem;
500
	}
501

502
	ret = 0;
503
free_mem:
504
	kfree(einfo);
505
out:
506
	retdata->err_val = ret;
507

508
	return 0;
509
}
510

511
int kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu *vcpu,
512
				struct kvm_vcpu_sbi_return *retdata)
513
{
514
	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
515

516
	retdata->out_val = kvm_pmu_num_counters(kvpmu);
517

518
	return 0;
519
}
520

521
int kvm_riscv_vcpu_pmu_ctr_info(struct kvm_vcpu *vcpu, unsigned long cidx,
522
				struct kvm_vcpu_sbi_return *retdata)
523
{
524
	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
525

526
	if (cidx > RISCV_KVM_MAX_COUNTERS || cidx == 1) {
527
		retdata->err_val = SBI_ERR_INVALID_PARAM;
528
		return 0;
529
	}
530

531
	retdata->out_val = kvpmu->pmc[cidx].cinfo.value;
532

533
	return 0;
534
}
535

536
int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base,
537
				 unsigned long ctr_mask, unsigned long flags, u64 ival,
538
				 struct kvm_vcpu_sbi_return *retdata)
539
{
540
	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
541
	int i, pmc_index, sbiret = 0;
542
	struct kvm_pmc *pmc;
543
	int fevent_code;
544
	bool snap_flag_set = flags & SBI_PMU_START_FLAG_INIT_SNAPSHOT;
545

546
	if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
547
		sbiret = SBI_ERR_INVALID_PARAM;
548
		goto out;
549
	}
550

551
	if (snap_flag_set) {
552
		if (kvpmu->snapshot_addr == INVALID_GPA) {
553
			sbiret = SBI_ERR_NO_SHMEM;
554
			goto out;
555
		}
556
		if (kvm_vcpu_read_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata,
557
					sizeof(struct riscv_pmu_snapshot_data))) {
558
			pr_warn("Unable to read snapshot shared memory while starting counters\n");
559
			sbiret = SBI_ERR_FAILURE;
560
			goto out;
561
		}
562
	}
563
	/* Start the counters that have been configured and requested by the guest */
564
	for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) {
565
		pmc_index = i + ctr_base;
566
		if (!test_bit(pmc_index, kvpmu->pmc_in_use))
567
			continue;
568
		/* The guest started the counter again. Reset the overflow status */
569
		clear_bit(pmc_index, kvpmu->pmc_overflown);
570
		pmc = &kvpmu->pmc[pmc_index];
571
		if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE) {
572
			pmc->counter_val = ival;
573
		} else if (snap_flag_set) {
574
			/* The counter index in the snapshot are relative to the counter base */
575
			pmc->counter_val = kvpmu->sdata->ctr_values[i];
576
		}
577

578
		if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
579
			fevent_code = get_event_code(pmc->event_idx);
580
			if (fevent_code >= SBI_PMU_FW_MAX) {
581
				sbiret = SBI_ERR_INVALID_PARAM;
582
				goto out;
583
			}
584

585
			/* Check if the counter was already started for some reason */
586
			if (kvpmu->fw_event[fevent_code].started) {
587
				sbiret = SBI_ERR_ALREADY_STARTED;
588
				continue;
589
			}
590

591
			kvpmu->fw_event[fevent_code].started = true;
592
			kvpmu->fw_event[fevent_code].value = pmc->counter_val;
593
		} else if (pmc->perf_event) {
594
			if (unlikely(pmc->started)) {
595
				sbiret = SBI_ERR_ALREADY_STARTED;
596
				continue;
597
			}
598
			perf_event_period(pmc->perf_event, kvm_pmu_get_sample_period(pmc));
599
			perf_event_enable(pmc->perf_event);
600
			pmc->started = true;
601
		} else {
602
			sbiret = SBI_ERR_INVALID_PARAM;
603
		}
604
	}
605

606
out:
607
	retdata->err_val = sbiret;
608

609
	return 0;
610
}
611

612
int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base,
613
				unsigned long ctr_mask, unsigned long flags,
614
				struct kvm_vcpu_sbi_return *retdata)
615
{
616
	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
617
	int i, pmc_index, sbiret = 0;
618
	u64 enabled, running;
619
	struct kvm_pmc *pmc;
620
	int fevent_code;
621
	bool snap_flag_set = flags & SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT;
622
	bool shmem_needs_update = false;
623

624
	if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
625
		sbiret = SBI_ERR_INVALID_PARAM;
626
		goto out;
627
	}
628

629
	if (snap_flag_set && kvpmu->snapshot_addr == INVALID_GPA) {
630
		sbiret = SBI_ERR_NO_SHMEM;
631
		goto out;
632
	}
633

634
	/* Stop the counters that have been configured and requested by the guest */
635
	for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) {
636
		pmc_index = i + ctr_base;
637
		if (!test_bit(pmc_index, kvpmu->pmc_in_use))
638
			continue;
639
		pmc = &kvpmu->pmc[pmc_index];
640
		if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
641
			fevent_code = get_event_code(pmc->event_idx);
642
			if (fevent_code >= SBI_PMU_FW_MAX) {
643
				sbiret = SBI_ERR_INVALID_PARAM;
644
				goto out;
645
			}
646

647
			if (!kvpmu->fw_event[fevent_code].started)
648
				sbiret = SBI_ERR_ALREADY_STOPPED;
649

650
			kvpmu->fw_event[fevent_code].started = false;
651
		} else if (pmc->perf_event) {
652
			if (pmc->started) {
653
				/* Stop counting the counter */
654
				perf_event_disable(pmc->perf_event);
655
				pmc->started = false;
656
			} else {
657
				sbiret = SBI_ERR_ALREADY_STOPPED;
658
			}
659

660
			if (flags & SBI_PMU_STOP_FLAG_RESET)
661
				/* Release the counter if this is a reset request */
662
				kvm_pmu_release_perf_event(pmc);
663
		} else {
664
			sbiret = SBI_ERR_INVALID_PARAM;
665
		}
666

667
		if (snap_flag_set && !sbiret) {
668
			if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW)
669
				pmc->counter_val = kvpmu->fw_event[fevent_code].value;
670
			else if (pmc->perf_event)
671
				pmc->counter_val += perf_event_read_value(pmc->perf_event,
672
									  &enabled, &running);
673
			/*
674
			 * The counter and overflow indicies in the snapshot region are w.r.to
675
			 * cbase. Modify the set bit in the counter mask instead of the pmc_index
676
			 * which indicates the absolute counter index.
677
			 */
678
			if (test_bit(pmc_index, kvpmu->pmc_overflown))
679
				kvpmu->sdata->ctr_overflow_mask |= BIT(i);
680
			kvpmu->sdata->ctr_values[i] = pmc->counter_val;
681
			shmem_needs_update = true;
682
		}
683

684
		if (flags & SBI_PMU_STOP_FLAG_RESET) {
685
			pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
686
			clear_bit(pmc_index, kvpmu->pmc_in_use);
687
			clear_bit(pmc_index, kvpmu->pmc_overflown);
688
			if (snap_flag_set) {
689
				/*
690
				 * Only clear the given counter as the caller is responsible to
691
				 * validate both the overflow mask and configured counters.
692
				 */
693
				kvpmu->sdata->ctr_overflow_mask &= ~BIT(i);
694
				shmem_needs_update = true;
695
			}
696
		}
697
	}
698

699
	if (shmem_needs_update)
700
		kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata,
701
					     sizeof(struct riscv_pmu_snapshot_data));
702

703
out:
704
	retdata->err_val = sbiret;
705

706
	return 0;
707
}
708

709
int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_base,
710
				     unsigned long ctr_mask, unsigned long flags,
711
				     unsigned long eidx, u64 evtdata,
712
				     struct kvm_vcpu_sbi_return *retdata)
713
{
714
	int ctr_idx, sbiret = 0;
715
	long ret;
716
	bool is_fevent;
717
	unsigned long event_code;
718
	u32 etype = kvm_pmu_get_perf_event_type(eidx);
719
	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
720
	struct kvm_pmc *pmc = NULL;
721
	struct perf_event_attr attr = {
722
		.type = etype,
723
		.size = sizeof(struct perf_event_attr),
724
		.pinned = true,
725
		.disabled = true,
726
		/*
727
		 * It should never reach here if the platform doesn't support the sscofpmf
728
		 * extension as mode filtering won't work without it.
729
		 */
730
		.exclude_host = true,
731
		.exclude_hv = true,
732
		.exclude_user = !!(flags & SBI_PMU_CFG_FLAG_SET_UINH),
733
		.exclude_kernel = !!(flags & SBI_PMU_CFG_FLAG_SET_SINH),
734
		.config1 = RISCV_PMU_CONFIG1_GUEST_EVENTS,
735
	};
736

737
	if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
738
		sbiret = SBI_ERR_INVALID_PARAM;
739
		goto out;
740
	}
741

742
	event_code = get_event_code(eidx);
743
	is_fevent = kvm_pmu_is_fw_event(eidx);
744
	if (is_fevent && event_code >= SBI_PMU_FW_MAX) {
745
		sbiret = SBI_ERR_NOT_SUPPORTED;
746
		goto out;
747
	}
748

749
	/*
750
	 * SKIP_MATCH flag indicates the caller is aware of the assigned counter
751
	 * for this event. Just do a sanity check if it already marked used.
752
	 */
753
	if (flags & SBI_PMU_CFG_FLAG_SKIP_MATCH) {
754
		if (!test_bit(ctr_base + __ffs(ctr_mask), kvpmu->pmc_in_use)) {
755
			sbiret = SBI_ERR_FAILURE;
756
			goto out;
757
		}
758
		ctr_idx = ctr_base + __ffs(ctr_mask);
759
	} else  {
760
		ctr_idx = pmu_get_pmc_index(kvpmu, eidx, ctr_base, ctr_mask);
761
		if (ctr_idx < 0) {
762
			sbiret = SBI_ERR_NOT_SUPPORTED;
763
			goto out;
764
		}
765
	}
766

767
	pmc = &kvpmu->pmc[ctr_idx];
768
	pmc->idx = ctr_idx;
769

770
	if (is_fevent) {
771
		if (flags & SBI_PMU_CFG_FLAG_AUTO_START)
772
			kvpmu->fw_event[event_code].started = true;
773
	} else {
774
		ret = kvm_pmu_create_perf_event(pmc, &attr, flags, eidx, evtdata);
775
		if (ret) {
776
			sbiret = SBI_ERR_NOT_SUPPORTED;
777
			goto out;
778
		}
779
	}
780

781
	set_bit(ctr_idx, kvpmu->pmc_in_use);
782
	pmc->event_idx = eidx;
783
	retdata->out_val = ctr_idx;
784
out:
785
	retdata->err_val = sbiret;
786

787
	return 0;
788
}
789

790
int kvm_riscv_vcpu_pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx,
791
				      struct kvm_vcpu_sbi_return *retdata)
792
{
793
	int ret;
794

795
	ret = pmu_fw_ctr_read_hi(vcpu, cidx, &retdata->out_val);
796
	if (ret == -EINVAL)
797
		retdata->err_val = SBI_ERR_INVALID_PARAM;
798

799
	return 0;
800
}
801

802
int kvm_riscv_vcpu_pmu_fw_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
803
				struct kvm_vcpu_sbi_return *retdata)
804
{
805
	int ret;
806

807
	ret = pmu_ctr_read(vcpu, cidx, &retdata->out_val);
808
	if (ret == -EINVAL)
809
		retdata->err_val = SBI_ERR_INVALID_PARAM;
810

811
	return 0;
812
}
813

814
void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
815
{
816
	int i = 0, ret, num_hw_ctrs = 0, hpm_width = 0;
817
	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
818
	struct kvm_pmc *pmc;
819

820
	/*
821
	 * PMU functionality should be only available to guests if privilege mode
822
	 * filtering is available in the host. Otherwise, guest will always count
823
	 * events while the execution is in hypervisor mode.
824
	 */
825
	if (!riscv_isa_extension_available(NULL, SSCOFPMF))
826
		return;
827

828
	ret = riscv_pmu_get_hpm_info(&hpm_width, &num_hw_ctrs);
829
	if (ret < 0 || !hpm_width || !num_hw_ctrs)
830
		return;
831

832
	/*
833
	 * Increase the number of hardware counters to offset the time counter.
834
	 */
835
	kvpmu->num_hw_ctrs = num_hw_ctrs + 1;
836
	kvpmu->num_fw_ctrs = SBI_PMU_FW_MAX;
837
	memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
838
	kvpmu->snapshot_addr = INVALID_GPA;
839

840
	if (kvpmu->num_hw_ctrs > RISCV_KVM_MAX_HW_CTRS) {
841
		pr_warn_once("Limiting the hardware counters to 32 as specified by the ISA");
842
		kvpmu->num_hw_ctrs = RISCV_KVM_MAX_HW_CTRS;
843
	}
844

845
	/*
846
	 * There is no correlation between the logical hardware counter and virtual counters.
847
	 * However, we need to encode a hpmcounter CSR in the counter info field so that
848
	 * KVM can trap n emulate the read. This works well in the migration use case as
849
	 * KVM doesn't care if the actual hpmcounter is available in the hardware or not.
850
	 */
851
	for (i = 0; i < kvm_pmu_num_counters(kvpmu); i++) {
852
		/* TIME CSR shouldn't be read from perf interface */
853
		if (i == 1)
854
			continue;
855
		pmc = &kvpmu->pmc[i];
856
		pmc->idx = i;
857
		pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
858
		pmc->vcpu = vcpu;
859
		if (i < kvpmu->num_hw_ctrs) {
860
			pmc->cinfo.type = SBI_PMU_CTR_TYPE_HW;
861
			if (i < 3)
862
				/* CY, IR counters */
863
				pmc->cinfo.width = 63;
864
			else
865
				pmc->cinfo.width = hpm_width;
866
			/*
867
			 * The CSR number doesn't have any relation with the logical
868
			 * hardware counters. The CSR numbers are encoded sequentially
869
			 * to avoid maintaining a map between the virtual counter
870
			 * and CSR number.
871
			 */
872
			pmc->cinfo.csr = CSR_CYCLE + i;
873
		} else {
874
			pmc->cinfo.type = SBI_PMU_CTR_TYPE_FW;
875
			pmc->cinfo.width = 63;
876
		}
877
	}
878

879
	kvpmu->init_done = true;
880
}
881

882
void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu)
883
{
884
	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
885
	struct kvm_pmc *pmc;
886
	int i;
887

888
	if (!kvpmu)
889
		return;
890

891
	for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS) {
892
		pmc = &kvpmu->pmc[i];
893
		pmc->counter_val = 0;
894
		kvm_pmu_release_perf_event(pmc);
895
		pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
896
	}
897
	bitmap_zero(kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS);
898
	bitmap_zero(kvpmu->pmc_overflown, RISCV_KVM_MAX_COUNTERS);
899
	memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
900
	kvm_pmu_clear_snapshot_area(vcpu);
901
}
902

903
void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu)
904
{
905
	kvm_riscv_vcpu_pmu_deinit(vcpu);
906
}
907

908
Product

Resources

Company