Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/drivers/cxl/core/pci.c
29536 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/* Copyright(c) 2021 Intel Corporation. All rights reserved. */
3
#include <linux/units.h>
4
#include <linux/io-64-nonatomic-lo-hi.h>
5
#include <linux/device.h>
6
#include <linux/delay.h>
7
#include <linux/pci.h>
8
#include <linux/pci-doe.h>
9
#include <linux/aer.h>
10
#include <cxlpci.h>
11
#include <cxlmem.h>
12
#include <cxl.h>
13
#include "core.h"
14
#include "trace.h"
15
16
/**
17
* DOC: cxl core pci
18
*
19
* Compute Express Link protocols are layered on top of PCIe. CXL core provides
20
* a set of helpers for CXL interactions which occur via PCIe.
21
*/
22
23
static unsigned short media_ready_timeout = 60;
24
module_param(media_ready_timeout, ushort, 0644);
25
MODULE_PARM_DESC(media_ready_timeout, "seconds to wait for media ready");
26
27
static int pci_get_port_num(struct pci_dev *pdev)
28
{
29
u32 lnkcap;
30
int type;
31
32
type = pci_pcie_type(pdev);
33
if (type != PCI_EXP_TYPE_DOWNSTREAM && type != PCI_EXP_TYPE_ROOT_PORT)
34
return -EINVAL;
35
36
if (pci_read_config_dword(pdev, pci_pcie_cap(pdev) + PCI_EXP_LNKCAP,
37
&lnkcap))
38
return -ENXIO;
39
40
return FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap);
41
}
42
43
/**
44
* __devm_cxl_add_dport_by_dev - allocate a dport by dport device
45
* @port: cxl_port that hosts the dport
46
* @dport_dev: 'struct device' of the dport
47
*
48
* Returns the allocated dport on success or ERR_PTR() of -errno on error
49
*/
50
struct cxl_dport *__devm_cxl_add_dport_by_dev(struct cxl_port *port,
51
struct device *dport_dev)
52
{
53
struct cxl_register_map map;
54
struct pci_dev *pdev;
55
int port_num, rc;
56
57
if (!dev_is_pci(dport_dev))
58
return ERR_PTR(-EINVAL);
59
60
pdev = to_pci_dev(dport_dev);
61
port_num = pci_get_port_num(pdev);
62
if (port_num < 0)
63
return ERR_PTR(port_num);
64
65
rc = cxl_find_regblock(pdev, CXL_REGLOC_RBI_COMPONENT, &map);
66
if (rc)
67
return ERR_PTR(rc);
68
69
device_lock_assert(&port->dev);
70
return devm_cxl_add_dport(port, dport_dev, port_num, map.resource);
71
}
72
EXPORT_SYMBOL_NS_GPL(__devm_cxl_add_dport_by_dev, "CXL");
73
74
struct cxl_walk_context {
75
struct pci_bus *bus;
76
struct cxl_port *port;
77
int type;
78
int error;
79
int count;
80
};
81
82
static int match_add_dports(struct pci_dev *pdev, void *data)
83
{
84
struct cxl_walk_context *ctx = data;
85
struct cxl_port *port = ctx->port;
86
int type = pci_pcie_type(pdev);
87
struct cxl_register_map map;
88
struct cxl_dport *dport;
89
u32 lnkcap, port_num;
90
int rc;
91
92
if (pdev->bus != ctx->bus)
93
return 0;
94
if (!pci_is_pcie(pdev))
95
return 0;
96
if (type != ctx->type)
97
return 0;
98
if (pci_read_config_dword(pdev, pci_pcie_cap(pdev) + PCI_EXP_LNKCAP,
99
&lnkcap))
100
return 0;
101
102
rc = cxl_find_regblock(pdev, CXL_REGLOC_RBI_COMPONENT, &map);
103
if (rc)
104
dev_dbg(&port->dev, "failed to find component registers\n");
105
106
port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap);
107
dport = devm_cxl_add_dport(port, &pdev->dev, port_num, map.resource);
108
if (IS_ERR(dport)) {
109
ctx->error = PTR_ERR(dport);
110
return PTR_ERR(dport);
111
}
112
ctx->count++;
113
114
return 0;
115
}
116
117
/**
118
* devm_cxl_port_enumerate_dports - enumerate downstream ports of the upstream port
119
* @port: cxl_port whose ->uport_dev is the upstream of dports to be enumerated
120
*
121
* Returns a positive number of dports enumerated or a negative error
122
* code.
123
*/
124
int devm_cxl_port_enumerate_dports(struct cxl_port *port)
125
{
126
struct pci_bus *bus = cxl_port_to_pci_bus(port);
127
struct cxl_walk_context ctx;
128
int type;
129
130
if (!bus)
131
return -ENXIO;
132
133
if (pci_is_root_bus(bus))
134
type = PCI_EXP_TYPE_ROOT_PORT;
135
else
136
type = PCI_EXP_TYPE_DOWNSTREAM;
137
138
ctx = (struct cxl_walk_context) {
139
.port = port,
140
.bus = bus,
141
.type = type,
142
};
143
pci_walk_bus(bus, match_add_dports, &ctx);
144
145
if (ctx.count == 0)
146
return -ENODEV;
147
if (ctx.error)
148
return ctx.error;
149
return ctx.count;
150
}
151
EXPORT_SYMBOL_NS_GPL(devm_cxl_port_enumerate_dports, "CXL");
152
153
static int cxl_dvsec_mem_range_valid(struct cxl_dev_state *cxlds, int id)
154
{
155
struct pci_dev *pdev = to_pci_dev(cxlds->dev);
156
int d = cxlds->cxl_dvsec;
157
bool valid = false;
158
int rc, i;
159
u32 temp;
160
161
if (id > CXL_DVSEC_RANGE_MAX)
162
return -EINVAL;
163
164
/* Check MEM INFO VALID bit first, give up after 1s */
165
i = 1;
166
do {
167
rc = pci_read_config_dword(pdev,
168
d + CXL_DVSEC_RANGE_SIZE_LOW(id),
169
&temp);
170
if (rc)
171
return rc;
172
173
valid = FIELD_GET(CXL_DVSEC_MEM_INFO_VALID, temp);
174
if (valid)
175
break;
176
msleep(1000);
177
} while (i--);
178
179
if (!valid) {
180
dev_err(&pdev->dev,
181
"Timeout awaiting memory range %d valid after 1s.\n",
182
id);
183
return -ETIMEDOUT;
184
}
185
186
return 0;
187
}
188
189
static int cxl_dvsec_mem_range_active(struct cxl_dev_state *cxlds, int id)
190
{
191
struct pci_dev *pdev = to_pci_dev(cxlds->dev);
192
int d = cxlds->cxl_dvsec;
193
bool active = false;
194
int rc, i;
195
u32 temp;
196
197
if (id > CXL_DVSEC_RANGE_MAX)
198
return -EINVAL;
199
200
/* Check MEM ACTIVE bit, up to 60s timeout by default */
201
for (i = media_ready_timeout; i; i--) {
202
rc = pci_read_config_dword(
203
pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(id), &temp);
204
if (rc)
205
return rc;
206
207
active = FIELD_GET(CXL_DVSEC_MEM_ACTIVE, temp);
208
if (active)
209
break;
210
msleep(1000);
211
}
212
213
if (!active) {
214
dev_err(&pdev->dev,
215
"timeout awaiting memory active after %d seconds\n",
216
media_ready_timeout);
217
return -ETIMEDOUT;
218
}
219
220
return 0;
221
}
222
223
/*
224
* Wait up to @media_ready_timeout for the device to report memory
225
* active.
226
*/
227
int cxl_await_media_ready(struct cxl_dev_state *cxlds)
228
{
229
struct pci_dev *pdev = to_pci_dev(cxlds->dev);
230
int d = cxlds->cxl_dvsec;
231
int rc, i, hdm_count;
232
u64 md_status;
233
u16 cap;
234
235
rc = pci_read_config_word(pdev,
236
d + CXL_DVSEC_CAP_OFFSET, &cap);
237
if (rc)
238
return rc;
239
240
hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap);
241
for (i = 0; i < hdm_count; i++) {
242
rc = cxl_dvsec_mem_range_valid(cxlds, i);
243
if (rc)
244
return rc;
245
}
246
247
for (i = 0; i < hdm_count; i++) {
248
rc = cxl_dvsec_mem_range_active(cxlds, i);
249
if (rc)
250
return rc;
251
}
252
253
md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
254
if (!CXLMDEV_READY(md_status))
255
return -EIO;
256
257
return 0;
258
}
259
EXPORT_SYMBOL_NS_GPL(cxl_await_media_ready, "CXL");
260
261
static int cxl_set_mem_enable(struct cxl_dev_state *cxlds, u16 val)
262
{
263
struct pci_dev *pdev = to_pci_dev(cxlds->dev);
264
int d = cxlds->cxl_dvsec;
265
u16 ctrl;
266
int rc;
267
268
rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl);
269
if (rc < 0)
270
return rc;
271
272
if ((ctrl & CXL_DVSEC_MEM_ENABLE) == val)
273
return 1;
274
ctrl &= ~CXL_DVSEC_MEM_ENABLE;
275
ctrl |= val;
276
277
rc = pci_write_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, ctrl);
278
if (rc < 0)
279
return rc;
280
281
return 0;
282
}
283
284
static void clear_mem_enable(void *cxlds)
285
{
286
cxl_set_mem_enable(cxlds, 0);
287
}
288
289
static int devm_cxl_enable_mem(struct device *host, struct cxl_dev_state *cxlds)
290
{
291
int rc;
292
293
rc = cxl_set_mem_enable(cxlds, CXL_DVSEC_MEM_ENABLE);
294
if (rc < 0)
295
return rc;
296
if (rc > 0)
297
return 0;
298
return devm_add_action_or_reset(host, clear_mem_enable, cxlds);
299
}
300
301
/* require dvsec ranges to be covered by a locked platform window */
302
static int dvsec_range_allowed(struct device *dev, const void *arg)
303
{
304
const struct range *dev_range = arg;
305
struct cxl_decoder *cxld;
306
307
if (!is_root_decoder(dev))
308
return 0;
309
310
cxld = to_cxl_decoder(dev);
311
312
if (!(cxld->flags & CXL_DECODER_F_RAM))
313
return 0;
314
315
return range_contains(&cxld->hpa_range, dev_range);
316
}
317
318
static void disable_hdm(void *_cxlhdm)
319
{
320
u32 global_ctrl;
321
struct cxl_hdm *cxlhdm = _cxlhdm;
322
void __iomem *hdm = cxlhdm->regs.hdm_decoder;
323
324
global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET);
325
writel(global_ctrl & ~CXL_HDM_DECODER_ENABLE,
326
hdm + CXL_HDM_DECODER_CTRL_OFFSET);
327
}
328
329
static int devm_cxl_enable_hdm(struct device *host, struct cxl_hdm *cxlhdm)
330
{
331
void __iomem *hdm = cxlhdm->regs.hdm_decoder;
332
u32 global_ctrl;
333
334
global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET);
335
writel(global_ctrl | CXL_HDM_DECODER_ENABLE,
336
hdm + CXL_HDM_DECODER_CTRL_OFFSET);
337
338
return devm_add_action_or_reset(host, disable_hdm, cxlhdm);
339
}
340
341
int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds,
342
struct cxl_endpoint_dvsec_info *info)
343
{
344
struct pci_dev *pdev = to_pci_dev(cxlds->dev);
345
struct device *dev = cxlds->dev;
346
int hdm_count, rc, i, ranges = 0;
347
int d = cxlds->cxl_dvsec;
348
u16 cap, ctrl;
349
350
if (!d) {
351
dev_dbg(dev, "No DVSEC Capability\n");
352
return -ENXIO;
353
}
354
355
rc = pci_read_config_word(pdev, d + CXL_DVSEC_CAP_OFFSET, &cap);
356
if (rc)
357
return rc;
358
359
if (!(cap & CXL_DVSEC_MEM_CAPABLE)) {
360
dev_dbg(dev, "Not MEM Capable\n");
361
return -ENXIO;
362
}
363
364
/*
365
* It is not allowed by spec for MEM.capable to be set and have 0 legacy
366
* HDM decoders (values > 2 are also undefined as of CXL 2.0). As this
367
* driver is for a spec defined class code which must be CXL.mem
368
* capable, there is no point in continuing to enable CXL.mem.
369
*/
370
hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap);
371
if (!hdm_count || hdm_count > 2)
372
return -EINVAL;
373
374
/*
375
* The current DVSEC values are moot if the memory capability is
376
* disabled, and they will remain moot after the HDM Decoder
377
* capability is enabled.
378
*/
379
rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl);
380
if (rc)
381
return rc;
382
383
info->mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl);
384
if (!info->mem_enabled)
385
return 0;
386
387
for (i = 0; i < hdm_count; i++) {
388
u64 base, size;
389
u32 temp;
390
391
rc = cxl_dvsec_mem_range_valid(cxlds, i);
392
if (rc)
393
return rc;
394
395
rc = pci_read_config_dword(
396
pdev, d + CXL_DVSEC_RANGE_SIZE_HIGH(i), &temp);
397
if (rc)
398
return rc;
399
400
size = (u64)temp << 32;
401
402
rc = pci_read_config_dword(
403
pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(i), &temp);
404
if (rc)
405
return rc;
406
407
size |= temp & CXL_DVSEC_MEM_SIZE_LOW_MASK;
408
if (!size) {
409
continue;
410
}
411
412
rc = pci_read_config_dword(
413
pdev, d + CXL_DVSEC_RANGE_BASE_HIGH(i), &temp);
414
if (rc)
415
return rc;
416
417
base = (u64)temp << 32;
418
419
rc = pci_read_config_dword(
420
pdev, d + CXL_DVSEC_RANGE_BASE_LOW(i), &temp);
421
if (rc)
422
return rc;
423
424
base |= temp & CXL_DVSEC_MEM_BASE_LOW_MASK;
425
426
info->dvsec_range[ranges++] = (struct range) {
427
.start = base,
428
.end = base + size - 1
429
};
430
}
431
432
info->ranges = ranges;
433
434
return 0;
435
}
436
EXPORT_SYMBOL_NS_GPL(cxl_dvsec_rr_decode, "CXL");
437
438
/**
439
* cxl_hdm_decode_init() - Setup HDM decoding for the endpoint
440
* @cxlds: Device state
441
* @cxlhdm: Mapped HDM decoder Capability
442
* @info: Cached DVSEC range registers info
443
*
444
* Try to enable the endpoint's HDM Decoder Capability
445
*/
446
int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm,
447
struct cxl_endpoint_dvsec_info *info)
448
{
449
void __iomem *hdm = cxlhdm->regs.hdm_decoder;
450
struct cxl_port *port = cxlhdm->port;
451
struct device *dev = cxlds->dev;
452
struct cxl_port *root;
453
int i, rc, allowed;
454
u32 global_ctrl = 0;
455
456
if (hdm)
457
global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET);
458
459
/*
460
* If the HDM Decoder Capability is already enabled then assume
461
* that some other agent like platform firmware set it up.
462
*/
463
if (global_ctrl & CXL_HDM_DECODER_ENABLE || (!hdm && info->mem_enabled))
464
return devm_cxl_enable_mem(&port->dev, cxlds);
465
466
/*
467
* If the HDM Decoder Capability does not exist and DVSEC was
468
* not setup, the DVSEC based emulation cannot be used.
469
*/
470
if (!hdm)
471
return -ENODEV;
472
473
/* The HDM Decoder Capability exists but is globally disabled. */
474
475
/*
476
* If the DVSEC CXL Range registers are not enabled, just
477
* enable and use the HDM Decoder Capability registers.
478
*/
479
if (!info->mem_enabled) {
480
rc = devm_cxl_enable_hdm(&port->dev, cxlhdm);
481
if (rc)
482
return rc;
483
484
return devm_cxl_enable_mem(&port->dev, cxlds);
485
}
486
487
/*
488
* Per CXL 2.0 Section 8.1.3.8.3 and 8.1.3.8.4 DVSEC CXL Range 1 Base
489
* [High,Low] when HDM operation is enabled the range register values
490
* are ignored by the device, but the spec also recommends matching the
491
* DVSEC Range 1,2 to HDM Decoder Range 0,1. So, non-zero info->ranges
492
* are expected even though Linux does not require or maintain that
493
* match. Check if at least one DVSEC range is enabled and allowed by
494
* the platform. That is, the DVSEC range must be covered by a locked
495
* platform window (CFMWS). Fail otherwise as the endpoint's decoders
496
* cannot be used.
497
*/
498
499
root = to_cxl_port(port->dev.parent);
500
while (!is_cxl_root(root) && is_cxl_port(root->dev.parent))
501
root = to_cxl_port(root->dev.parent);
502
if (!is_cxl_root(root)) {
503
dev_err(dev, "Failed to acquire root port for HDM enable\n");
504
return -ENODEV;
505
}
506
507
for (i = 0, allowed = 0; i < info->ranges; i++) {
508
struct device *cxld_dev;
509
510
cxld_dev = device_find_child(&root->dev, &info->dvsec_range[i],
511
dvsec_range_allowed);
512
if (!cxld_dev) {
513
dev_dbg(dev, "DVSEC Range%d denied by platform\n", i);
514
continue;
515
}
516
dev_dbg(dev, "DVSEC Range%d allowed by platform\n", i);
517
put_device(cxld_dev);
518
allowed++;
519
}
520
521
if (!allowed) {
522
dev_err(dev, "Range register decodes outside platform defined CXL ranges.\n");
523
return -ENXIO;
524
}
525
526
return 0;
527
}
528
EXPORT_SYMBOL_NS_GPL(cxl_hdm_decode_init, "CXL");
529
530
#define CXL_DOE_TABLE_ACCESS_REQ_CODE 0x000000ff
531
#define CXL_DOE_TABLE_ACCESS_REQ_CODE_READ 0
532
#define CXL_DOE_TABLE_ACCESS_TABLE_TYPE 0x0000ff00
533
#define CXL_DOE_TABLE_ACCESS_TABLE_TYPE_CDATA 0
534
#define CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE 0xffff0000
535
#define CXL_DOE_TABLE_ACCESS_LAST_ENTRY 0xffff
536
#define CXL_DOE_PROTOCOL_TABLE_ACCESS 2
537
538
#define CDAT_DOE_REQ(entry_handle) cpu_to_le32 \
539
(FIELD_PREP(CXL_DOE_TABLE_ACCESS_REQ_CODE, \
540
CXL_DOE_TABLE_ACCESS_REQ_CODE_READ) | \
541
FIELD_PREP(CXL_DOE_TABLE_ACCESS_TABLE_TYPE, \
542
CXL_DOE_TABLE_ACCESS_TABLE_TYPE_CDATA) | \
543
FIELD_PREP(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE, (entry_handle)))
544
545
static int cxl_cdat_get_length(struct device *dev,
546
struct pci_doe_mb *doe_mb,
547
size_t *length)
548
{
549
__le32 request = CDAT_DOE_REQ(0);
550
__le32 response[2];
551
int rc;
552
553
rc = pci_doe(doe_mb, PCI_VENDOR_ID_CXL,
554
CXL_DOE_PROTOCOL_TABLE_ACCESS,
555
&request, sizeof(request),
556
&response, sizeof(response));
557
if (rc < 0) {
558
dev_err(dev, "DOE failed: %d", rc);
559
return rc;
560
}
561
if (rc < sizeof(response))
562
return -EIO;
563
564
*length = le32_to_cpu(response[1]);
565
dev_dbg(dev, "CDAT length %zu\n", *length);
566
567
return 0;
568
}
569
570
static int cxl_cdat_read_table(struct device *dev,
571
struct pci_doe_mb *doe_mb,
572
struct cdat_doe_rsp *rsp, size_t *length)
573
{
574
size_t received, remaining = *length;
575
unsigned int entry_handle = 0;
576
union cdat_data *data;
577
__le32 saved_dw = 0;
578
579
do {
580
__le32 request = CDAT_DOE_REQ(entry_handle);
581
int rc;
582
583
rc = pci_doe(doe_mb, PCI_VENDOR_ID_CXL,
584
CXL_DOE_PROTOCOL_TABLE_ACCESS,
585
&request, sizeof(request),
586
rsp, sizeof(*rsp) + remaining);
587
if (rc < 0) {
588
dev_err(dev, "DOE failed: %d", rc);
589
return rc;
590
}
591
592
if (rc < sizeof(*rsp))
593
return -EIO;
594
595
data = (union cdat_data *)rsp->data;
596
received = rc - sizeof(*rsp);
597
598
if (entry_handle == 0) {
599
if (received != sizeof(data->header))
600
return -EIO;
601
} else {
602
if (received < sizeof(data->entry) ||
603
received != le16_to_cpu(data->entry.length))
604
return -EIO;
605
}
606
607
/* Get the CXL table access header entry handle */
608
entry_handle = FIELD_GET(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE,
609
le32_to_cpu(rsp->doe_header));
610
611
/*
612
* Table Access Response Header overwrote the last DW of
613
* previous entry, so restore that DW
614
*/
615
rsp->doe_header = saved_dw;
616
remaining -= received;
617
rsp = (void *)rsp + received;
618
saved_dw = rsp->doe_header;
619
} while (entry_handle != CXL_DOE_TABLE_ACCESS_LAST_ENTRY);
620
621
/* Length in CDAT header may exceed concatenation of CDAT entries */
622
*length -= remaining;
623
624
return 0;
625
}
626
627
static unsigned char cdat_checksum(void *buf, size_t size)
628
{
629
unsigned char sum, *data = buf;
630
size_t i;
631
632
for (sum = 0, i = 0; i < size; i++)
633
sum += data[i];
634
return sum;
635
}
636
637
/**
638
* read_cdat_data - Read the CDAT data on this port
639
* @port: Port to read data from
640
*
641
* This call will sleep waiting for responses from the DOE mailbox.
642
*/
643
void read_cdat_data(struct cxl_port *port)
644
{
645
struct device *uport = port->uport_dev;
646
struct device *dev = &port->dev;
647
struct pci_doe_mb *doe_mb;
648
struct pci_dev *pdev = NULL;
649
struct cxl_memdev *cxlmd;
650
struct cdat_doe_rsp *buf;
651
size_t table_length, length;
652
int rc;
653
654
if (is_cxl_memdev(uport)) {
655
struct device *host;
656
657
cxlmd = to_cxl_memdev(uport);
658
host = cxlmd->dev.parent;
659
if (dev_is_pci(host))
660
pdev = to_pci_dev(host);
661
} else if (dev_is_pci(uport)) {
662
pdev = to_pci_dev(uport);
663
}
664
665
if (!pdev)
666
return;
667
668
doe_mb = pci_find_doe_mailbox(pdev, PCI_VENDOR_ID_CXL,
669
CXL_DOE_PROTOCOL_TABLE_ACCESS);
670
if (!doe_mb) {
671
dev_dbg(dev, "No CDAT mailbox\n");
672
return;
673
}
674
675
port->cdat_available = true;
676
677
if (cxl_cdat_get_length(dev, doe_mb, &length)) {
678
dev_dbg(dev, "No CDAT length\n");
679
return;
680
}
681
682
/*
683
* The begin of the CDAT buffer needs space for additional 4
684
* bytes for the DOE header. Table data starts afterwards.
685
*/
686
buf = devm_kzalloc(dev, sizeof(*buf) + length, GFP_KERNEL);
687
if (!buf)
688
goto err;
689
690
table_length = length;
691
692
rc = cxl_cdat_read_table(dev, doe_mb, buf, &length);
693
if (rc)
694
goto err;
695
696
if (table_length != length)
697
dev_warn(dev, "Malformed CDAT table length (%zu:%zu), discarding trailing data\n",
698
table_length, length);
699
700
if (cdat_checksum(buf->data, length))
701
goto err;
702
703
port->cdat.table = buf->data;
704
port->cdat.length = length;
705
706
return;
707
err:
708
/* Don't leave table data allocated on error */
709
devm_kfree(dev, buf);
710
dev_err(dev, "Failed to read/validate CDAT.\n");
711
}
712
EXPORT_SYMBOL_NS_GPL(read_cdat_data, "CXL");
713
714
static void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds,
715
void __iomem *ras_base)
716
{
717
void __iomem *addr;
718
u32 status;
719
720
if (!ras_base)
721
return;
722
723
addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET;
724
status = readl(addr);
725
if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
726
writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
727
trace_cxl_aer_correctable_error(cxlds->cxlmd, status);
728
}
729
}
730
731
static void cxl_handle_endpoint_cor_ras(struct cxl_dev_state *cxlds)
732
{
733
return __cxl_handle_cor_ras(cxlds, cxlds->regs.ras);
734
}
735
736
/* CXL spec rev3.0 8.2.4.16.1 */
737
static void header_log_copy(void __iomem *ras_base, u32 *log)
738
{
739
void __iomem *addr;
740
u32 *log_addr;
741
int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32);
742
743
addr = ras_base + CXL_RAS_HEADER_LOG_OFFSET;
744
log_addr = log;
745
746
for (i = 0; i < log_u32_size; i++) {
747
*log_addr = readl(addr);
748
log_addr++;
749
addr += sizeof(u32);
750
}
751
}
752
753
/*
754
* Log the state of the RAS status registers and prepare them to log the
755
* next error status. Return 1 if reset needed.
756
*/
757
static bool __cxl_handle_ras(struct cxl_dev_state *cxlds,
758
void __iomem *ras_base)
759
{
760
u32 hl[CXL_HEADERLOG_SIZE_U32];
761
void __iomem *addr;
762
u32 status;
763
u32 fe;
764
765
if (!ras_base)
766
return false;
767
768
addr = ras_base + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET;
769
status = readl(addr);
770
if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK))
771
return false;
772
773
/* If multiple errors, log header points to first error from ctrl reg */
774
if (hweight32(status) > 1) {
775
void __iomem *rcc_addr =
776
ras_base + CXL_RAS_CAP_CONTROL_OFFSET;
777
778
fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK,
779
readl(rcc_addr)));
780
} else {
781
fe = status;
782
}
783
784
header_log_copy(ras_base, hl);
785
trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe, hl);
786
writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr);
787
788
return true;
789
}
790
791
static bool cxl_handle_endpoint_ras(struct cxl_dev_state *cxlds)
792
{
793
return __cxl_handle_ras(cxlds, cxlds->regs.ras);
794
}
795
796
#ifdef CONFIG_PCIEAER_CXL
797
798
static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
799
{
800
resource_size_t aer_phys;
801
struct device *host;
802
u16 aer_cap;
803
804
aer_cap = cxl_rcrb_to_aer(dport->dport_dev, dport->rcrb.base);
805
if (aer_cap) {
806
host = dport->reg_map.host;
807
aer_phys = aer_cap + dport->rcrb.base;
808
dport->regs.dport_aer = devm_cxl_iomap_block(host, aer_phys,
809
sizeof(struct aer_capability_regs));
810
}
811
}
812
813
static void cxl_dport_map_ras(struct cxl_dport *dport)
814
{
815
struct cxl_register_map *map = &dport->reg_map;
816
struct device *dev = dport->dport_dev;
817
818
if (!map->component_map.ras.valid)
819
dev_dbg(dev, "RAS registers not found\n");
820
else if (cxl_map_component_regs(map, &dport->regs.component,
821
BIT(CXL_CM_CAP_CAP_ID_RAS)))
822
dev_dbg(dev, "Failed to map RAS capability.\n");
823
}
824
825
static void cxl_disable_rch_root_ints(struct cxl_dport *dport)
826
{
827
void __iomem *aer_base = dport->regs.dport_aer;
828
u32 aer_cmd_mask, aer_cmd;
829
830
if (!aer_base)
831
return;
832
833
/*
834
* Disable RCH root port command interrupts.
835
* CXL 3.0 12.2.1.1 - RCH Downstream Port-detected Errors
836
*
837
* This sequence may not be necessary. CXL spec states disabling
838
* the root cmd register's interrupts is required. But, PCI spec
839
* shows these are disabled by default on reset.
840
*/
841
aer_cmd_mask = (PCI_ERR_ROOT_CMD_COR_EN |
842
PCI_ERR_ROOT_CMD_NONFATAL_EN |
843
PCI_ERR_ROOT_CMD_FATAL_EN);
844
aer_cmd = readl(aer_base + PCI_ERR_ROOT_COMMAND);
845
aer_cmd &= ~aer_cmd_mask;
846
writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND);
847
}
848
849
/**
850
* cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport
851
* @dport: the cxl_dport that needs to be initialized
852
* @host: host device for devm operations
853
*/
854
void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host)
855
{
856
dport->reg_map.host = host;
857
cxl_dport_map_ras(dport);
858
859
if (dport->rch) {
860
struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport->dport_dev);
861
862
if (!host_bridge->native_aer)
863
return;
864
865
cxl_dport_map_rch_aer(dport);
866
cxl_disable_rch_root_ints(dport);
867
}
868
}
869
EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL");
870
871
static void cxl_handle_rdport_cor_ras(struct cxl_dev_state *cxlds,
872
struct cxl_dport *dport)
873
{
874
return __cxl_handle_cor_ras(cxlds, dport->regs.ras);
875
}
876
877
static bool cxl_handle_rdport_ras(struct cxl_dev_state *cxlds,
878
struct cxl_dport *dport)
879
{
880
return __cxl_handle_ras(cxlds, dport->regs.ras);
881
}
882
883
/*
884
* Copy the AER capability registers using 32 bit read accesses.
885
* This is necessary because RCRB AER capability is MMIO mapped. Clear the
886
* status after copying.
887
*
888
* @aer_base: base address of AER capability block in RCRB
889
* @aer_regs: destination for copying AER capability
890
*/
891
static bool cxl_rch_get_aer_info(void __iomem *aer_base,
892
struct aer_capability_regs *aer_regs)
893
{
894
int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32);
895
u32 *aer_regs_buf = (u32 *)aer_regs;
896
int n;
897
898
if (!aer_base)
899
return false;
900
901
/* Use readl() to guarantee 32-bit accesses */
902
for (n = 0; n < read_cnt; n++)
903
aer_regs_buf[n] = readl(aer_base + n * sizeof(u32));
904
905
writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS);
906
writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS);
907
908
return true;
909
}
910
911
/* Get AER severity. Return false if there is no error. */
912
static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs,
913
int *severity)
914
{
915
if (aer_regs->uncor_status & ~aer_regs->uncor_mask) {
916
if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV)
917
*severity = AER_FATAL;
918
else
919
*severity = AER_NONFATAL;
920
return true;
921
}
922
923
if (aer_regs->cor_status & ~aer_regs->cor_mask) {
924
*severity = AER_CORRECTABLE;
925
return true;
926
}
927
928
return false;
929
}
930
931
static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
932
{
933
struct pci_dev *pdev = to_pci_dev(cxlds->dev);
934
struct aer_capability_regs aer_regs;
935
struct cxl_dport *dport;
936
int severity;
937
938
struct cxl_port *port __free(put_cxl_port) =
939
cxl_pci_find_port(pdev, &dport);
940
if (!port)
941
return;
942
943
if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs))
944
return;
945
946
if (!cxl_rch_get_aer_severity(&aer_regs, &severity))
947
return;
948
949
pci_print_aer(pdev, severity, &aer_regs);
950
951
if (severity == AER_CORRECTABLE)
952
cxl_handle_rdport_cor_ras(cxlds, dport);
953
else
954
cxl_handle_rdport_ras(cxlds, dport);
955
}
956
957
#else
958
static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { }
959
#endif
960
961
void cxl_cor_error_detected(struct pci_dev *pdev)
962
{
963
struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
964
struct device *dev = &cxlds->cxlmd->dev;
965
966
scoped_guard(device, dev) {
967
if (!dev->driver) {
968
dev_warn(&pdev->dev,
969
"%s: memdev disabled, abort error handling\n",
970
dev_name(dev));
971
return;
972
}
973
974
if (cxlds->rcd)
975
cxl_handle_rdport_errors(cxlds);
976
977
cxl_handle_endpoint_cor_ras(cxlds);
978
}
979
}
980
EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, "CXL");
981
982
pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
983
pci_channel_state_t state)
984
{
985
struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
986
struct cxl_memdev *cxlmd = cxlds->cxlmd;
987
struct device *dev = &cxlmd->dev;
988
bool ue;
989
990
scoped_guard(device, dev) {
991
if (!dev->driver) {
992
dev_warn(&pdev->dev,
993
"%s: memdev disabled, abort error handling\n",
994
dev_name(dev));
995
return PCI_ERS_RESULT_DISCONNECT;
996
}
997
998
if (cxlds->rcd)
999
cxl_handle_rdport_errors(cxlds);
1000
/*
1001
* A frozen channel indicates an impending reset which is fatal to
1002
* CXL.mem operation, and will likely crash the system. On the off
1003
* chance the situation is recoverable dump the status of the RAS
1004
* capability registers and bounce the active state of the memdev.
1005
*/
1006
ue = cxl_handle_endpoint_ras(cxlds);
1007
}
1008
1009
1010
switch (state) {
1011
case pci_channel_io_normal:
1012
if (ue) {
1013
device_release_driver(dev);
1014
return PCI_ERS_RESULT_NEED_RESET;
1015
}
1016
return PCI_ERS_RESULT_CAN_RECOVER;
1017
case pci_channel_io_frozen:
1018
dev_warn(&pdev->dev,
1019
"%s: frozen state error detected, disable CXL.mem\n",
1020
dev_name(dev));
1021
device_release_driver(dev);
1022
return PCI_ERS_RESULT_NEED_RESET;
1023
case pci_channel_io_perm_failure:
1024
dev_warn(&pdev->dev,
1025
"failure state error detected, request disconnect\n");
1026
return PCI_ERS_RESULT_DISCONNECT;
1027
}
1028
return PCI_ERS_RESULT_NEED_RESET;
1029
}
1030
EXPORT_SYMBOL_NS_GPL(cxl_error_detected, "CXL");
1031
1032
static int cxl_flit_size(struct pci_dev *pdev)
1033
{
1034
if (cxl_pci_flit_256(pdev))
1035
return 256;
1036
1037
return 68;
1038
}
1039
1040
/**
1041
* cxl_pci_get_latency - calculate the link latency for the PCIe link
1042
* @pdev: PCI device
1043
*
1044
* return: calculated latency or 0 for no latency
1045
*
1046
* CXL Memory Device SW Guide v1.0 2.11.4 Link latency calculation
1047
* Link latency = LinkPropagationLatency + FlitLatency + RetimerLatency
1048
* LinkProgationLatency is negligible, so 0 will be used
1049
* RetimerLatency is assumed to be negligible and 0 will be used
1050
* FlitLatency = FlitSize / LinkBandwidth
1051
* FlitSize is defined by spec. CXL rev3.0 4.2.1.
1052
* 68B flit is used up to 32GT/s. >32GT/s, 256B flit size is used.
1053
* The FlitLatency is converted to picoseconds.
1054
*/
1055
long cxl_pci_get_latency(struct pci_dev *pdev)
1056
{
1057
long bw;
1058
1059
bw = pcie_link_speed_mbps(pdev);
1060
if (bw < 0)
1061
return 0;
1062
bw /= BITS_PER_BYTE;
1063
1064
return cxl_flit_size(pdev) * MEGA / bw;
1065
}
1066
1067
static int __cxl_endpoint_decoder_reset_detected(struct device *dev, void *data)
1068
{
1069
struct cxl_port *port = data;
1070
struct cxl_decoder *cxld;
1071
struct cxl_hdm *cxlhdm;
1072
void __iomem *hdm;
1073
u32 ctrl;
1074
1075
if (!is_endpoint_decoder(dev))
1076
return 0;
1077
1078
cxld = to_cxl_decoder(dev);
1079
if ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)
1080
return 0;
1081
1082
cxlhdm = dev_get_drvdata(&port->dev);
1083
hdm = cxlhdm->regs.hdm_decoder;
1084
ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(cxld->id));
1085
1086
return !FIELD_GET(CXL_HDM_DECODER0_CTRL_COMMITTED, ctrl);
1087
}
1088
1089
bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port)
1090
{
1091
return device_for_each_child(&port->dev, port,
1092
__cxl_endpoint_decoder_reset_detected);
1093
}
1094
EXPORT_SYMBOL_NS_GPL(cxl_endpoint_decoder_reset_detected, "CXL");
1095
1096
int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c)
1097
{
1098
int speed, bw;
1099
u16 lnksta;
1100
u32 width;
1101
1102
speed = pcie_link_speed_mbps(pdev);
1103
if (speed < 0)
1104
return speed;
1105
speed /= BITS_PER_BYTE;
1106
1107
pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnksta);
1108
width = FIELD_GET(PCI_EXP_LNKSTA_NLW, lnksta);
1109
bw = speed * width;
1110
1111
for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
1112
c[i].read_bandwidth = bw;
1113
c[i].write_bandwidth = bw;
1114
}
1115
1116
return 0;
1117
}
1118
1119
/*
1120
* Set max timeout such that platforms will optimize GPF flow to avoid
1121
* the implied worst-case scenario delays. On a sane platform, all
1122
* devices should always complete GPF within the energy budget of
1123
* the GPF flow. The kernel does not have enough information to pick
1124
* anything better than "maximize timeouts and hope it works".
1125
*
1126
* A misbehaving device could block forward progress of GPF for all
1127
* the other devices, exhausting the energy budget of the platform.
1128
* However, the spec seems to assume that moving on from slow to respond
1129
* devices is a virtue. It is not possible to know that, in actuality,
1130
* the slow to respond device is *the* most critical device in the
1131
* system to wait.
1132
*/
1133
#define GPF_TIMEOUT_BASE_MAX 2
1134
#define GPF_TIMEOUT_SCALE_MAX 7 /* 10 seconds */
1135
1136
u16 cxl_gpf_get_dvsec(struct device *dev)
1137
{
1138
struct pci_dev *pdev;
1139
bool is_port = true;
1140
u16 dvsec;
1141
1142
if (!dev_is_pci(dev))
1143
return 0;
1144
1145
pdev = to_pci_dev(dev);
1146
if (pci_pcie_type(pdev) == PCI_EXP_TYPE_ENDPOINT)
1147
is_port = false;
1148
1149
dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL,
1150
is_port ? CXL_DVSEC_PORT_GPF : CXL_DVSEC_DEVICE_GPF);
1151
if (!dvsec)
1152
dev_warn(dev, "%s GPF DVSEC not present\n",
1153
is_port ? "Port" : "Device");
1154
return dvsec;
1155
}
1156
EXPORT_SYMBOL_NS_GPL(cxl_gpf_get_dvsec, "CXL");
1157
1158
static int update_gpf_port_dvsec(struct pci_dev *pdev, int dvsec, int phase)
1159
{
1160
u64 base, scale;
1161
int rc, offset;
1162
u16 ctrl;
1163
1164
switch (phase) {
1165
case 1:
1166
offset = CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET;
1167
base = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK;
1168
scale = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK;
1169
break;
1170
case 2:
1171
offset = CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET;
1172
base = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK;
1173
scale = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK;
1174
break;
1175
default:
1176
return -EINVAL;
1177
}
1178
1179
rc = pci_read_config_word(pdev, dvsec + offset, &ctrl);
1180
if (rc)
1181
return rc;
1182
1183
if (FIELD_GET(base, ctrl) == GPF_TIMEOUT_BASE_MAX &&
1184
FIELD_GET(scale, ctrl) == GPF_TIMEOUT_SCALE_MAX)
1185
return 0;
1186
1187
ctrl = FIELD_PREP(base, GPF_TIMEOUT_BASE_MAX);
1188
ctrl |= FIELD_PREP(scale, GPF_TIMEOUT_SCALE_MAX);
1189
1190
rc = pci_write_config_word(pdev, dvsec + offset, ctrl);
1191
if (!rc)
1192
pci_dbg(pdev, "Port GPF phase %d timeout: %d0 secs\n",
1193
phase, GPF_TIMEOUT_BASE_MAX);
1194
1195
return rc;
1196
}
1197
1198
int cxl_gpf_port_setup(struct cxl_dport *dport)
1199
{
1200
if (!dport)
1201
return -EINVAL;
1202
1203
if (!dport->gpf_dvsec) {
1204
struct pci_dev *pdev;
1205
int dvsec;
1206
1207
dvsec = cxl_gpf_get_dvsec(dport->dport_dev);
1208
if (!dvsec)
1209
return -EINVAL;
1210
1211
dport->gpf_dvsec = dvsec;
1212
pdev = to_pci_dev(dport->dport_dev);
1213
update_gpf_port_dvsec(pdev, dport->gpf_dvsec, 1);
1214
update_gpf_port_dvsec(pdev, dport->gpf_dvsec, 2);
1215
}
1216
1217
return 0;
1218
}
1219
1220
static int count_dports(struct pci_dev *pdev, void *data)
1221
{
1222
struct cxl_walk_context *ctx = data;
1223
int type = pci_pcie_type(pdev);
1224
1225
if (pdev->bus != ctx->bus)
1226
return 0;
1227
if (!pci_is_pcie(pdev))
1228
return 0;
1229
if (type != ctx->type)
1230
return 0;
1231
1232
ctx->count++;
1233
return 0;
1234
}
1235
1236
int cxl_port_get_possible_dports(struct cxl_port *port)
1237
{
1238
struct pci_bus *bus = cxl_port_to_pci_bus(port);
1239
struct cxl_walk_context ctx;
1240
int type;
1241
1242
if (!bus) {
1243
dev_err(&port->dev, "No PCI bus found for port %s\n",
1244
dev_name(&port->dev));
1245
return -ENXIO;
1246
}
1247
1248
if (pci_is_root_bus(bus))
1249
type = PCI_EXP_TYPE_ROOT_PORT;
1250
else
1251
type = PCI_EXP_TYPE_DOWNSTREAM;
1252
1253
ctx = (struct cxl_walk_context) {
1254
.bus = bus,
1255
.type = type,
1256
};
1257
pci_walk_bus(bus, count_dports, &ctx);
1258
1259
return ctx.count;
1260
}
1261
1262