Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/s390/pci/pci_event.c
29521 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* Copyright IBM Corp. 2012
4
*
5
* Author(s):
6
* Jan Glauber <[email protected]>
7
*/
8
9
#define KMSG_COMPONENT "zpci"
10
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
11
12
#include <linux/kernel.h>
13
#include <linux/pci.h>
14
#include <asm/pci_debug.h>
15
#include <asm/pci_dma.h>
16
#include <asm/sclp.h>
17
18
#include "pci_bus.h"
19
#include "pci_report.h"
20
21
/* Content Code Description for PCI Function Error */
22
struct zpci_ccdf_err {
23
u32 reserved1;
24
u32 fh; /* function handle */
25
u32 fid; /* function id */
26
u32 ett : 4; /* expected table type */
27
u32 mvn : 12; /* MSI vector number */
28
u32 dmaas : 8; /* DMA address space */
29
u32 : 6;
30
u32 q : 1; /* event qualifier */
31
u32 rw : 1; /* read/write */
32
u64 faddr; /* failing address */
33
u32 reserved3;
34
u16 reserved4;
35
u16 pec; /* PCI event code */
36
} __packed;
37
38
/* Content Code Description for PCI Function Availability */
39
struct zpci_ccdf_avail {
40
u32 reserved1;
41
u32 fh; /* function handle */
42
u32 fid; /* function id */
43
u32 reserved2;
44
u32 reserved3;
45
u32 reserved4;
46
u32 reserved5;
47
u16 reserved6;
48
u16 pec; /* PCI event code */
49
} __packed;
50
51
static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res)
52
{
53
switch (ers_res) {
54
case PCI_ERS_RESULT_CAN_RECOVER:
55
case PCI_ERS_RESULT_RECOVERED:
56
case PCI_ERS_RESULT_NEED_RESET:
57
case PCI_ERS_RESULT_NONE:
58
return false;
59
default:
60
return true;
61
}
62
}
63
64
static bool is_passed_through(struct pci_dev *pdev)
65
{
66
struct zpci_dev *zdev = to_zpci(pdev);
67
bool ret;
68
69
mutex_lock(&zdev->kzdev_lock);
70
ret = !!zdev->kzdev;
71
mutex_unlock(&zdev->kzdev_lock);
72
73
return ret;
74
}
75
76
static bool is_driver_supported(struct pci_driver *driver)
77
{
78
if (!driver || !driver->err_handler)
79
return false;
80
if (!driver->err_handler->error_detected)
81
return false;
82
return true;
83
}
84
85
static pci_ers_result_t zpci_event_notify_error_detected(struct pci_dev *pdev,
86
struct pci_driver *driver)
87
{
88
pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
89
90
ers_res = driver->err_handler->error_detected(pdev, pdev->error_state);
91
pci_uevent_ers(pdev, ers_res);
92
if (ers_result_indicates_abort(ers_res))
93
pr_info("%s: Automatic recovery failed after initial reporting\n", pci_name(pdev));
94
else if (ers_res == PCI_ERS_RESULT_NEED_RESET)
95
pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
96
97
return ers_res;
98
}
99
100
static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev,
101
struct pci_driver *driver)
102
{
103
pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
104
struct zpci_dev *zdev = to_zpci(pdev);
105
int rc;
106
107
/* The underlying device may have been disabled by the event */
108
if (!zdev_enabled(zdev))
109
return PCI_ERS_RESULT_NEED_RESET;
110
111
pr_info("%s: Unblocking device access for examination\n", pci_name(pdev));
112
rc = zpci_reset_load_store_blocked(zdev);
113
if (rc) {
114
pr_err("%s: Unblocking device access failed\n", pci_name(pdev));
115
/* Let's try a full reset instead */
116
return PCI_ERS_RESULT_NEED_RESET;
117
}
118
119
if (driver->err_handler->mmio_enabled)
120
ers_res = driver->err_handler->mmio_enabled(pdev);
121
else
122
ers_res = PCI_ERS_RESULT_NONE;
123
124
if (ers_result_indicates_abort(ers_res)) {
125
pr_info("%s: Automatic recovery failed after MMIO re-enable\n",
126
pci_name(pdev));
127
return ers_res;
128
} else if (ers_res == PCI_ERS_RESULT_NEED_RESET) {
129
pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
130
return ers_res;
131
}
132
133
pr_debug("%s: Unblocking DMA\n", pci_name(pdev));
134
rc = zpci_clear_error_state(zdev);
135
if (!rc) {
136
pdev->error_state = pci_channel_io_normal;
137
} else {
138
pr_err("%s: Unblocking DMA failed\n", pci_name(pdev));
139
/* Let's try a full reset instead */
140
return PCI_ERS_RESULT_NEED_RESET;
141
}
142
143
return ers_res;
144
}
145
146
static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev,
147
struct pci_driver *driver)
148
{
149
pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
150
151
pr_info("%s: Initiating reset\n", pci_name(pdev));
152
if (zpci_hot_reset_device(to_zpci(pdev))) {
153
pr_err("%s: The reset request failed\n", pci_name(pdev));
154
return ers_res;
155
}
156
pdev->error_state = pci_channel_io_normal;
157
158
if (driver->err_handler->slot_reset)
159
ers_res = driver->err_handler->slot_reset(pdev);
160
else
161
ers_res = PCI_ERS_RESULT_NONE;
162
163
if (ers_result_indicates_abort(ers_res)) {
164
pr_info("%s: Automatic recovery failed after slot reset\n", pci_name(pdev));
165
return ers_res;
166
}
167
168
return ers_res;
169
}
170
171
/* zpci_event_attempt_error_recovery - Try to recover the given PCI function
172
* @pdev: PCI function to recover currently in the error state
173
*
174
* We follow the scheme outlined in Documentation/PCI/pci-error-recovery.rst.
175
* With the simplification that recovery always happens per function
176
* and the platform determines which functions are affected for
177
* multi-function devices.
178
*/
179
static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
180
{
181
pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
182
struct zpci_dev *zdev = to_zpci(pdev);
183
char *status_str = "success";
184
struct pci_driver *driver;
185
186
/*
187
* Ensure that the PCI function is not removed concurrently, no driver
188
* is unbound or probed and that userspace can't access its
189
* configuration space while we perform recovery.
190
*/
191
pci_dev_lock(pdev);
192
if (pdev->error_state == pci_channel_io_perm_failure) {
193
ers_res = PCI_ERS_RESULT_DISCONNECT;
194
goto out_unlock;
195
}
196
pdev->error_state = pci_channel_io_frozen;
197
198
if (is_passed_through(pdev)) {
199
pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n",
200
pci_name(pdev));
201
status_str = "failed (pass-through)";
202
goto out_unlock;
203
}
204
205
driver = to_pci_driver(pdev->dev.driver);
206
if (!is_driver_supported(driver)) {
207
if (!driver) {
208
pr_info("%s: Cannot be recovered because no driver is bound to the device\n",
209
pci_name(pdev));
210
status_str = "failed (no driver)";
211
} else {
212
pr_info("%s: The %s driver bound to the device does not support error recovery\n",
213
pci_name(pdev),
214
driver->name);
215
status_str = "failed (no driver support)";
216
}
217
goto out_unlock;
218
}
219
220
ers_res = zpci_event_notify_error_detected(pdev, driver);
221
if (ers_result_indicates_abort(ers_res)) {
222
status_str = "failed (abort on detection)";
223
goto out_unlock;
224
}
225
226
if (ers_res != PCI_ERS_RESULT_NEED_RESET) {
227
ers_res = zpci_event_do_error_state_clear(pdev, driver);
228
if (ers_result_indicates_abort(ers_res)) {
229
status_str = "failed (abort on MMIO enable)";
230
goto out_unlock;
231
}
232
}
233
234
if (ers_res == PCI_ERS_RESULT_NEED_RESET)
235
ers_res = zpci_event_do_reset(pdev, driver);
236
237
/*
238
* ers_res can be PCI_ERS_RESULT_NONE either because the driver
239
* decided to return it, indicating that it abstains from voting
240
* on how to recover, or because it didn't implement the callback.
241
* Both cases assume, that if there is nothing else causing a
242
* disconnect, we recovered successfully.
243
*/
244
if (ers_res == PCI_ERS_RESULT_NONE)
245
ers_res = PCI_ERS_RESULT_RECOVERED;
246
247
if (ers_res != PCI_ERS_RESULT_RECOVERED) {
248
pci_uevent_ers(pdev, PCI_ERS_RESULT_DISCONNECT);
249
pr_err("%s: Automatic recovery failed; operator intervention is required\n",
250
pci_name(pdev));
251
status_str = "failed (driver can't recover)";
252
goto out_unlock;
253
}
254
255
pr_info("%s: The device is ready to resume operations\n", pci_name(pdev));
256
if (driver->err_handler->resume)
257
driver->err_handler->resume(pdev);
258
pci_uevent_ers(pdev, PCI_ERS_RESULT_RECOVERED);
259
out_unlock:
260
pci_dev_unlock(pdev);
261
zpci_report_status(zdev, "recovery", status_str);
262
263
return ers_res;
264
}
265
266
/* zpci_event_io_failure - Report PCI channel failure state to driver
267
* @pdev: PCI function for which to report
268
* @es: PCI channel failure state to report
269
*/
270
static void zpci_event_io_failure(struct pci_dev *pdev, pci_channel_state_t es)
271
{
272
struct pci_driver *driver;
273
274
pci_dev_lock(pdev);
275
pdev->error_state = es;
276
/**
277
* While vfio-pci's error_detected callback notifies user-space QEMU
278
* reacts to this by freezing the guest. In an s390 environment PCI
279
* errors are rarely fatal so this is overkill. Instead in the future
280
* we will inject the error event and let the guest recover the device
281
* itself.
282
*/
283
if (is_passed_through(pdev))
284
goto out;
285
driver = to_pci_driver(pdev->dev.driver);
286
if (driver && driver->err_handler && driver->err_handler->error_detected)
287
driver->err_handler->error_detected(pdev, pdev->error_state);
288
out:
289
pci_dev_unlock(pdev);
290
}
291
292
static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
293
{
294
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
295
struct pci_dev *pdev = NULL;
296
pci_ers_result_t ers_res;
297
u32 fh = 0;
298
int rc;
299
300
zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n",
301
ccdf->fid, ccdf->fh, ccdf->pec);
302
zpci_err("error CCDF:\n");
303
zpci_err_hex(ccdf, sizeof(*ccdf));
304
305
if (zdev) {
306
mutex_lock(&zdev->state_lock);
307
rc = clp_refresh_fh(zdev->fid, &fh);
308
if (rc)
309
goto no_pdev;
310
if (!fh || ccdf->fh != fh) {
311
/* Ignore events with stale handles */
312
zpci_dbg(3, "err fid:%x, fh:%x (stale %x)\n",
313
ccdf->fid, fh, ccdf->fh);
314
goto no_pdev;
315
}
316
zpci_update_fh(zdev, ccdf->fh);
317
if (zdev->zbus->bus)
318
pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
319
}
320
321
pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
322
pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
323
324
if (!pdev)
325
goto no_pdev;
326
327
switch (ccdf->pec) {
328
case 0x002a: /* Error event concerns FMB */
329
case 0x002b:
330
case 0x002c:
331
break;
332
case 0x0040: /* Service Action or Error Recovery Failed */
333
case 0x003b:
334
zpci_event_io_failure(pdev, pci_channel_io_perm_failure);
335
break;
336
default: /* PCI function left in the error state attempt to recover */
337
ers_res = zpci_event_attempt_error_recovery(pdev);
338
if (ers_res != PCI_ERS_RESULT_RECOVERED)
339
zpci_event_io_failure(pdev, pci_channel_io_perm_failure);
340
break;
341
}
342
pci_dev_put(pdev);
343
no_pdev:
344
if (zdev)
345
mutex_unlock(&zdev->state_lock);
346
zpci_zdev_put(zdev);
347
}
348
349
void zpci_event_error(void *data)
350
{
351
if (zpci_is_enabled())
352
__zpci_event_error(data);
353
}
354
355
static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh)
356
{
357
zpci_update_fh(zdev, fh);
358
/* Give the driver a hint that the function is
359
* already unusable.
360
*/
361
zpci_bus_remove_device(zdev, true);
362
/* Even though the device is already gone we still
363
* need to free zPCI resources as part of the disable.
364
*/
365
if (zdev_enabled(zdev))
366
zpci_disable_device(zdev);
367
zdev->state = ZPCI_FN_STATE_STANDBY;
368
}
369
370
static void zpci_event_reappear(struct zpci_dev *zdev)
371
{
372
lockdep_assert_held(&zdev->state_lock);
373
/*
374
* The zdev is in the reserved state. This means that it was presumed to
375
* go away but there are still undropped references. Now, the platform
376
* announced its availability again. Bring back the lingering zdev
377
* to standby. This is safe because we hold a temporary reference
378
* now so that it won't go away. Account for the re-appearance of the
379
* underlying device by incrementing the reference count.
380
*/
381
zdev->state = ZPCI_FN_STATE_STANDBY;
382
zpci_zdev_get(zdev);
383
zpci_dbg(1, "rea fid:%x, fh:%x\n", zdev->fid, zdev->fh);
384
}
385
386
static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
387
{
388
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
389
bool existing_zdev = !!zdev;
390
enum zpci_state state;
391
392
zpci_dbg(3, "avl fid:%x, fh:%x, pec:%x\n",
393
ccdf->fid, ccdf->fh, ccdf->pec);
394
395
if (existing_zdev)
396
mutex_lock(&zdev->state_lock);
397
398
switch (ccdf->pec) {
399
case 0x0301: /* Reserved|Standby -> Configured */
400
if (!zdev) {
401
zdev = zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_CONFIGURED);
402
if (IS_ERR(zdev))
403
break;
404
if (zpci_add_device(zdev)) {
405
kfree(zdev);
406
break;
407
}
408
} else {
409
if (zdev->state == ZPCI_FN_STATE_RESERVED)
410
zpci_event_reappear(zdev);
411
/* the configuration request may be stale */
412
else if (zdev->state != ZPCI_FN_STATE_STANDBY)
413
break;
414
zdev->state = ZPCI_FN_STATE_CONFIGURED;
415
}
416
zpci_scan_configured_device(zdev, ccdf->fh);
417
break;
418
case 0x0302: /* Reserved -> Standby */
419
if (!zdev) {
420
zdev = zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_STANDBY);
421
if (IS_ERR(zdev))
422
break;
423
if (zpci_add_device(zdev)) {
424
kfree(zdev);
425
break;
426
}
427
} else {
428
if (zdev->state == ZPCI_FN_STATE_RESERVED)
429
zpci_event_reappear(zdev);
430
zpci_update_fh(zdev, ccdf->fh);
431
}
432
break;
433
case 0x0303: /* Deconfiguration requested */
434
if (zdev) {
435
/* The event may have been queued before we configured
436
* the device.
437
*/
438
if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
439
break;
440
zpci_update_fh(zdev, ccdf->fh);
441
zpci_deconfigure_device(zdev);
442
}
443
break;
444
case 0x0304: /* Configured -> Standby|Reserved */
445
if (zdev) {
446
/* The event may have been queued before we configured
447
* the device.:
448
*/
449
if (zdev->state == ZPCI_FN_STATE_CONFIGURED)
450
zpci_event_hard_deconfigured(zdev, ccdf->fh);
451
/* The 0x0304 event may immediately reserve the device */
452
if (!clp_get_state(zdev->fid, &state) &&
453
state == ZPCI_FN_STATE_RESERVED) {
454
zpci_device_reserved(zdev);
455
}
456
}
457
break;
458
case 0x0306: /* 0x308 or 0x302 for multiple devices */
459
zpci_remove_reserved_devices();
460
zpci_scan_devices();
461
break;
462
case 0x0308: /* Standby -> Reserved */
463
if (!zdev)
464
break;
465
zpci_device_reserved(zdev);
466
break;
467
default:
468
break;
469
}
470
if (existing_zdev) {
471
mutex_unlock(&zdev->state_lock);
472
zpci_zdev_put(zdev);
473
}
474
}
475
476
void zpci_event_availability(void *data)
477
{
478
if (zpci_is_enabled())
479
__zpci_event_availability(data);
480
}
481
482