Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/tools/lib/bpf/libbpf.c
29524 views
1
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2
3
/*
4
* Common eBPF ELF object loading operations.
5
*
6
* Copyright (C) 2013-2015 Alexei Starovoitov <[email protected]>
7
* Copyright (C) 2015 Wang Nan <[email protected]>
8
* Copyright (C) 2015 Huawei Inc.
9
* Copyright (C) 2017 Nicira, Inc.
10
* Copyright (C) 2019 Isovalent, Inc.
11
*/
12
13
#ifndef _GNU_SOURCE
14
#define _GNU_SOURCE
15
#endif
16
#include <stdlib.h>
17
#include <stdio.h>
18
#include <stdarg.h>
19
#include <libgen.h>
20
#include <inttypes.h>
21
#include <limits.h>
22
#include <string.h>
23
#include <unistd.h>
24
#include <endian.h>
25
#include <fcntl.h>
26
#include <errno.h>
27
#include <ctype.h>
28
#include <asm/unistd.h>
29
#include <linux/err.h>
30
#include <linux/kernel.h>
31
#include <linux/bpf.h>
32
#include <linux/btf.h>
33
#include <linux/filter.h>
34
#include <linux/limits.h>
35
#include <linux/perf_event.h>
36
#include <linux/bpf_perf_event.h>
37
#include <linux/ring_buffer.h>
38
#include <sys/epoll.h>
39
#include <sys/ioctl.h>
40
#include <sys/mman.h>
41
#include <sys/stat.h>
42
#include <sys/types.h>
43
#include <sys/vfs.h>
44
#include <sys/utsname.h>
45
#include <sys/resource.h>
46
#include <libelf.h>
47
#include <gelf.h>
48
#include <zlib.h>
49
50
#include "libbpf.h"
51
#include "bpf.h"
52
#include "btf.h"
53
#include "libbpf_internal.h"
54
#include "hashmap.h"
55
#include "bpf_gen_internal.h"
56
#include "zip.h"
57
58
#ifndef BPF_FS_MAGIC
59
#define BPF_FS_MAGIC 0xcafe4a11
60
#endif
61
62
#define MAX_EVENT_NAME_LEN 64
63
64
#define BPF_FS_DEFAULT_PATH "/sys/fs/bpf"
65
66
#define BPF_INSN_SZ (sizeof(struct bpf_insn))
67
68
/* vsprintf() in __base_pr() uses nonliteral format string. It may break
69
* compilation if user enables corresponding warning. Disable it explicitly.
70
*/
71
#pragma GCC diagnostic ignored "-Wformat-nonliteral"
72
73
#define __printf(a, b) __attribute__((format(printf, a, b)))
74
75
static struct bpf_map *bpf_object__add_map(struct bpf_object *obj);
76
static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog);
77
static int map_set_def_max_entries(struct bpf_map *map);
78
79
static const char * const attach_type_name[] = {
80
[BPF_CGROUP_INET_INGRESS] = "cgroup_inet_ingress",
81
[BPF_CGROUP_INET_EGRESS] = "cgroup_inet_egress",
82
[BPF_CGROUP_INET_SOCK_CREATE] = "cgroup_inet_sock_create",
83
[BPF_CGROUP_INET_SOCK_RELEASE] = "cgroup_inet_sock_release",
84
[BPF_CGROUP_SOCK_OPS] = "cgroup_sock_ops",
85
[BPF_CGROUP_DEVICE] = "cgroup_device",
86
[BPF_CGROUP_INET4_BIND] = "cgroup_inet4_bind",
87
[BPF_CGROUP_INET6_BIND] = "cgroup_inet6_bind",
88
[BPF_CGROUP_INET4_CONNECT] = "cgroup_inet4_connect",
89
[BPF_CGROUP_INET6_CONNECT] = "cgroup_inet6_connect",
90
[BPF_CGROUP_UNIX_CONNECT] = "cgroup_unix_connect",
91
[BPF_CGROUP_INET4_POST_BIND] = "cgroup_inet4_post_bind",
92
[BPF_CGROUP_INET6_POST_BIND] = "cgroup_inet6_post_bind",
93
[BPF_CGROUP_INET4_GETPEERNAME] = "cgroup_inet4_getpeername",
94
[BPF_CGROUP_INET6_GETPEERNAME] = "cgroup_inet6_getpeername",
95
[BPF_CGROUP_UNIX_GETPEERNAME] = "cgroup_unix_getpeername",
96
[BPF_CGROUP_INET4_GETSOCKNAME] = "cgroup_inet4_getsockname",
97
[BPF_CGROUP_INET6_GETSOCKNAME] = "cgroup_inet6_getsockname",
98
[BPF_CGROUP_UNIX_GETSOCKNAME] = "cgroup_unix_getsockname",
99
[BPF_CGROUP_UDP4_SENDMSG] = "cgroup_udp4_sendmsg",
100
[BPF_CGROUP_UDP6_SENDMSG] = "cgroup_udp6_sendmsg",
101
[BPF_CGROUP_UNIX_SENDMSG] = "cgroup_unix_sendmsg",
102
[BPF_CGROUP_SYSCTL] = "cgroup_sysctl",
103
[BPF_CGROUP_UDP4_RECVMSG] = "cgroup_udp4_recvmsg",
104
[BPF_CGROUP_UDP6_RECVMSG] = "cgroup_udp6_recvmsg",
105
[BPF_CGROUP_UNIX_RECVMSG] = "cgroup_unix_recvmsg",
106
[BPF_CGROUP_GETSOCKOPT] = "cgroup_getsockopt",
107
[BPF_CGROUP_SETSOCKOPT] = "cgroup_setsockopt",
108
[BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser",
109
[BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict",
110
[BPF_SK_SKB_VERDICT] = "sk_skb_verdict",
111
[BPF_SK_MSG_VERDICT] = "sk_msg_verdict",
112
[BPF_LIRC_MODE2] = "lirc_mode2",
113
[BPF_FLOW_DISSECTOR] = "flow_dissector",
114
[BPF_TRACE_RAW_TP] = "trace_raw_tp",
115
[BPF_TRACE_FENTRY] = "trace_fentry",
116
[BPF_TRACE_FEXIT] = "trace_fexit",
117
[BPF_MODIFY_RETURN] = "modify_return",
118
[BPF_LSM_MAC] = "lsm_mac",
119
[BPF_LSM_CGROUP] = "lsm_cgroup",
120
[BPF_SK_LOOKUP] = "sk_lookup",
121
[BPF_TRACE_ITER] = "trace_iter",
122
[BPF_XDP_DEVMAP] = "xdp_devmap",
123
[BPF_XDP_CPUMAP] = "xdp_cpumap",
124
[BPF_XDP] = "xdp",
125
[BPF_SK_REUSEPORT_SELECT] = "sk_reuseport_select",
126
[BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_reuseport_select_or_migrate",
127
[BPF_PERF_EVENT] = "perf_event",
128
[BPF_TRACE_KPROBE_MULTI] = "trace_kprobe_multi",
129
[BPF_STRUCT_OPS] = "struct_ops",
130
[BPF_NETFILTER] = "netfilter",
131
[BPF_TCX_INGRESS] = "tcx_ingress",
132
[BPF_TCX_EGRESS] = "tcx_egress",
133
[BPF_TRACE_UPROBE_MULTI] = "trace_uprobe_multi",
134
[BPF_NETKIT_PRIMARY] = "netkit_primary",
135
[BPF_NETKIT_PEER] = "netkit_peer",
136
[BPF_TRACE_KPROBE_SESSION] = "trace_kprobe_session",
137
[BPF_TRACE_UPROBE_SESSION] = "trace_uprobe_session",
138
};
139
140
static const char * const link_type_name[] = {
141
[BPF_LINK_TYPE_UNSPEC] = "unspec",
142
[BPF_LINK_TYPE_RAW_TRACEPOINT] = "raw_tracepoint",
143
[BPF_LINK_TYPE_TRACING] = "tracing",
144
[BPF_LINK_TYPE_CGROUP] = "cgroup",
145
[BPF_LINK_TYPE_ITER] = "iter",
146
[BPF_LINK_TYPE_NETNS] = "netns",
147
[BPF_LINK_TYPE_XDP] = "xdp",
148
[BPF_LINK_TYPE_PERF_EVENT] = "perf_event",
149
[BPF_LINK_TYPE_KPROBE_MULTI] = "kprobe_multi",
150
[BPF_LINK_TYPE_STRUCT_OPS] = "struct_ops",
151
[BPF_LINK_TYPE_NETFILTER] = "netfilter",
152
[BPF_LINK_TYPE_TCX] = "tcx",
153
[BPF_LINK_TYPE_UPROBE_MULTI] = "uprobe_multi",
154
[BPF_LINK_TYPE_NETKIT] = "netkit",
155
[BPF_LINK_TYPE_SOCKMAP] = "sockmap",
156
};
157
158
static const char * const map_type_name[] = {
159
[BPF_MAP_TYPE_UNSPEC] = "unspec",
160
[BPF_MAP_TYPE_HASH] = "hash",
161
[BPF_MAP_TYPE_ARRAY] = "array",
162
[BPF_MAP_TYPE_PROG_ARRAY] = "prog_array",
163
[BPF_MAP_TYPE_PERF_EVENT_ARRAY] = "perf_event_array",
164
[BPF_MAP_TYPE_PERCPU_HASH] = "percpu_hash",
165
[BPF_MAP_TYPE_PERCPU_ARRAY] = "percpu_array",
166
[BPF_MAP_TYPE_STACK_TRACE] = "stack_trace",
167
[BPF_MAP_TYPE_CGROUP_ARRAY] = "cgroup_array",
168
[BPF_MAP_TYPE_LRU_HASH] = "lru_hash",
169
[BPF_MAP_TYPE_LRU_PERCPU_HASH] = "lru_percpu_hash",
170
[BPF_MAP_TYPE_LPM_TRIE] = "lpm_trie",
171
[BPF_MAP_TYPE_ARRAY_OF_MAPS] = "array_of_maps",
172
[BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps",
173
[BPF_MAP_TYPE_DEVMAP] = "devmap",
174
[BPF_MAP_TYPE_DEVMAP_HASH] = "devmap_hash",
175
[BPF_MAP_TYPE_SOCKMAP] = "sockmap",
176
[BPF_MAP_TYPE_CPUMAP] = "cpumap",
177
[BPF_MAP_TYPE_XSKMAP] = "xskmap",
178
[BPF_MAP_TYPE_SOCKHASH] = "sockhash",
179
[BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage",
180
[BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray",
181
[BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE] = "percpu_cgroup_storage",
182
[BPF_MAP_TYPE_QUEUE] = "queue",
183
[BPF_MAP_TYPE_STACK] = "stack",
184
[BPF_MAP_TYPE_SK_STORAGE] = "sk_storage",
185
[BPF_MAP_TYPE_STRUCT_OPS] = "struct_ops",
186
[BPF_MAP_TYPE_RINGBUF] = "ringbuf",
187
[BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage",
188
[BPF_MAP_TYPE_TASK_STORAGE] = "task_storage",
189
[BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter",
190
[BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf",
191
[BPF_MAP_TYPE_CGRP_STORAGE] = "cgrp_storage",
192
[BPF_MAP_TYPE_ARENA] = "arena",
193
};
194
195
static const char * const prog_type_name[] = {
196
[BPF_PROG_TYPE_UNSPEC] = "unspec",
197
[BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter",
198
[BPF_PROG_TYPE_KPROBE] = "kprobe",
199
[BPF_PROG_TYPE_SCHED_CLS] = "sched_cls",
200
[BPF_PROG_TYPE_SCHED_ACT] = "sched_act",
201
[BPF_PROG_TYPE_TRACEPOINT] = "tracepoint",
202
[BPF_PROG_TYPE_XDP] = "xdp",
203
[BPF_PROG_TYPE_PERF_EVENT] = "perf_event",
204
[BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb",
205
[BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock",
206
[BPF_PROG_TYPE_LWT_IN] = "lwt_in",
207
[BPF_PROG_TYPE_LWT_OUT] = "lwt_out",
208
[BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit",
209
[BPF_PROG_TYPE_SOCK_OPS] = "sock_ops",
210
[BPF_PROG_TYPE_SK_SKB] = "sk_skb",
211
[BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device",
212
[BPF_PROG_TYPE_SK_MSG] = "sk_msg",
213
[BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint",
214
[BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr",
215
[BPF_PROG_TYPE_LWT_SEG6LOCAL] = "lwt_seg6local",
216
[BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2",
217
[BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport",
218
[BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector",
219
[BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl",
220
[BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable",
221
[BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt",
222
[BPF_PROG_TYPE_TRACING] = "tracing",
223
[BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops",
224
[BPF_PROG_TYPE_EXT] = "ext",
225
[BPF_PROG_TYPE_LSM] = "lsm",
226
[BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup",
227
[BPF_PROG_TYPE_SYSCALL] = "syscall",
228
[BPF_PROG_TYPE_NETFILTER] = "netfilter",
229
};
230
231
static int __base_pr(enum libbpf_print_level level, const char *format,
232
va_list args)
233
{
234
const char *env_var = "LIBBPF_LOG_LEVEL";
235
static enum libbpf_print_level min_level = LIBBPF_INFO;
236
static bool initialized;
237
238
if (!initialized) {
239
char *verbosity;
240
241
initialized = true;
242
verbosity = getenv(env_var);
243
if (verbosity) {
244
if (strcasecmp(verbosity, "warn") == 0)
245
min_level = LIBBPF_WARN;
246
else if (strcasecmp(verbosity, "debug") == 0)
247
min_level = LIBBPF_DEBUG;
248
else if (strcasecmp(verbosity, "info") == 0)
249
min_level = LIBBPF_INFO;
250
else
251
fprintf(stderr, "libbpf: unrecognized '%s' envvar value: '%s', should be one of 'warn', 'debug', or 'info'.\n",
252
env_var, verbosity);
253
}
254
}
255
256
/* if too verbose, skip logging */
257
if (level > min_level)
258
return 0;
259
260
return vfprintf(stderr, format, args);
261
}
262
263
static libbpf_print_fn_t __libbpf_pr = __base_pr;
264
265
libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn)
266
{
267
libbpf_print_fn_t old_print_fn;
268
269
old_print_fn = __atomic_exchange_n(&__libbpf_pr, fn, __ATOMIC_RELAXED);
270
271
return old_print_fn;
272
}
273
274
__printf(2, 3)
275
void libbpf_print(enum libbpf_print_level level, const char *format, ...)
276
{
277
va_list args;
278
int old_errno;
279
libbpf_print_fn_t print_fn;
280
281
print_fn = __atomic_load_n(&__libbpf_pr, __ATOMIC_RELAXED);
282
if (!print_fn)
283
return;
284
285
old_errno = errno;
286
287
va_start(args, format);
288
print_fn(level, format, args);
289
va_end(args);
290
291
errno = old_errno;
292
}
293
294
static void pr_perm_msg(int err)
295
{
296
struct rlimit limit;
297
char buf[100];
298
299
if (err != -EPERM || geteuid() != 0)
300
return;
301
302
err = getrlimit(RLIMIT_MEMLOCK, &limit);
303
if (err)
304
return;
305
306
if (limit.rlim_cur == RLIM_INFINITY)
307
return;
308
309
if (limit.rlim_cur < 1024)
310
snprintf(buf, sizeof(buf), "%zu bytes", (size_t)limit.rlim_cur);
311
else if (limit.rlim_cur < 1024*1024)
312
snprintf(buf, sizeof(buf), "%.1f KiB", (double)limit.rlim_cur / 1024);
313
else
314
snprintf(buf, sizeof(buf), "%.1f MiB", (double)limit.rlim_cur / (1024*1024));
315
316
pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n",
317
buf);
318
}
319
320
/* Copied from tools/perf/util/util.h */
321
#ifndef zfree
322
# define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
323
#endif
324
325
#ifndef zclose
326
# define zclose(fd) ({ \
327
int ___err = 0; \
328
if ((fd) >= 0) \
329
___err = close((fd)); \
330
fd = -1; \
331
___err; })
332
#endif
333
334
static inline __u64 ptr_to_u64(const void *ptr)
335
{
336
return (__u64) (unsigned long) ptr;
337
}
338
339
int libbpf_set_strict_mode(enum libbpf_strict_mode mode)
340
{
341
/* as of v1.0 libbpf_set_strict_mode() is a no-op */
342
return 0;
343
}
344
345
__u32 libbpf_major_version(void)
346
{
347
return LIBBPF_MAJOR_VERSION;
348
}
349
350
__u32 libbpf_minor_version(void)
351
{
352
return LIBBPF_MINOR_VERSION;
353
}
354
355
const char *libbpf_version_string(void)
356
{
357
#define __S(X) #X
358
#define _S(X) __S(X)
359
return "v" _S(LIBBPF_MAJOR_VERSION) "." _S(LIBBPF_MINOR_VERSION);
360
#undef _S
361
#undef __S
362
}
363
364
enum reloc_type {
365
RELO_LD64,
366
RELO_CALL,
367
RELO_DATA,
368
RELO_EXTERN_LD64,
369
RELO_EXTERN_CALL,
370
RELO_SUBPROG_ADDR,
371
RELO_CORE,
372
};
373
374
struct reloc_desc {
375
enum reloc_type type;
376
int insn_idx;
377
union {
378
const struct bpf_core_relo *core_relo; /* used when type == RELO_CORE */
379
struct {
380
int map_idx;
381
int sym_off;
382
int ext_idx;
383
};
384
};
385
};
386
387
/* stored as sec_def->cookie for all libbpf-supported SEC()s */
388
enum sec_def_flags {
389
SEC_NONE = 0,
390
/* expected_attach_type is optional, if kernel doesn't support that */
391
SEC_EXP_ATTACH_OPT = 1,
392
/* legacy, only used by libbpf_get_type_names() and
393
* libbpf_attach_type_by_name(), not used by libbpf itself at all.
394
* This used to be associated with cgroup (and few other) BPF programs
395
* that were attachable through BPF_PROG_ATTACH command. Pretty
396
* meaningless nowadays, though.
397
*/
398
SEC_ATTACHABLE = 2,
399
SEC_ATTACHABLE_OPT = SEC_ATTACHABLE | SEC_EXP_ATTACH_OPT,
400
/* attachment target is specified through BTF ID in either kernel or
401
* other BPF program's BTF object
402
*/
403
SEC_ATTACH_BTF = 4,
404
/* BPF program type allows sleeping/blocking in kernel */
405
SEC_SLEEPABLE = 8,
406
/* BPF program support non-linear XDP buffer */
407
SEC_XDP_FRAGS = 16,
408
/* Setup proper attach type for usdt probes. */
409
SEC_USDT = 32,
410
};
411
412
struct bpf_sec_def {
413
char *sec;
414
enum bpf_prog_type prog_type;
415
enum bpf_attach_type expected_attach_type;
416
long cookie;
417
int handler_id;
418
419
libbpf_prog_setup_fn_t prog_setup_fn;
420
libbpf_prog_prepare_load_fn_t prog_prepare_load_fn;
421
libbpf_prog_attach_fn_t prog_attach_fn;
422
};
423
424
/*
425
* bpf_prog should be a better name but it has been used in
426
* linux/filter.h.
427
*/
428
struct bpf_program {
429
char *name;
430
char *sec_name;
431
size_t sec_idx;
432
const struct bpf_sec_def *sec_def;
433
/* this program's instruction offset (in number of instructions)
434
* within its containing ELF section
435
*/
436
size_t sec_insn_off;
437
/* number of original instructions in ELF section belonging to this
438
* program, not taking into account subprogram instructions possible
439
* appended later during relocation
440
*/
441
size_t sec_insn_cnt;
442
/* Offset (in number of instructions) of the start of instruction
443
* belonging to this BPF program within its containing main BPF
444
* program. For the entry-point (main) BPF program, this is always
445
* zero. For a sub-program, this gets reset before each of main BPF
446
* programs are processed and relocated and is used to determined
447
* whether sub-program was already appended to the main program, and
448
* if yes, at which instruction offset.
449
*/
450
size_t sub_insn_off;
451
452
/* instructions that belong to BPF program; insns[0] is located at
453
* sec_insn_off instruction within its ELF section in ELF file, so
454
* when mapping ELF file instruction index to the local instruction,
455
* one needs to subtract sec_insn_off; and vice versa.
456
*/
457
struct bpf_insn *insns;
458
/* actual number of instruction in this BPF program's image; for
459
* entry-point BPF programs this includes the size of main program
460
* itself plus all the used sub-programs, appended at the end
461
*/
462
size_t insns_cnt;
463
464
struct reloc_desc *reloc_desc;
465
int nr_reloc;
466
467
/* BPF verifier log settings */
468
char *log_buf;
469
size_t log_size;
470
__u32 log_level;
471
472
struct bpf_object *obj;
473
474
int fd;
475
bool autoload;
476
bool autoattach;
477
bool sym_global;
478
bool mark_btf_static;
479
enum bpf_prog_type type;
480
enum bpf_attach_type expected_attach_type;
481
int exception_cb_idx;
482
483
int prog_ifindex;
484
__u32 attach_btf_obj_fd;
485
__u32 attach_btf_id;
486
__u32 attach_prog_fd;
487
488
void *func_info;
489
__u32 func_info_rec_size;
490
__u32 func_info_cnt;
491
492
void *line_info;
493
__u32 line_info_rec_size;
494
__u32 line_info_cnt;
495
__u32 prog_flags;
496
__u8 hash[SHA256_DIGEST_LENGTH];
497
};
498
499
struct bpf_struct_ops {
500
struct bpf_program **progs;
501
__u32 *kern_func_off;
502
/* e.g. struct tcp_congestion_ops in bpf_prog's btf format */
503
void *data;
504
/* e.g. struct bpf_struct_ops_tcp_congestion_ops in
505
* btf_vmlinux's format.
506
* struct bpf_struct_ops_tcp_congestion_ops {
507
* [... some other kernel fields ...]
508
* struct tcp_congestion_ops data;
509
* }
510
* kern_vdata-size == sizeof(struct bpf_struct_ops_tcp_congestion_ops)
511
* bpf_map__init_kern_struct_ops() will populate the "kern_vdata"
512
* from "data".
513
*/
514
void *kern_vdata;
515
__u32 type_id;
516
};
517
518
#define DATA_SEC ".data"
519
#define BSS_SEC ".bss"
520
#define RODATA_SEC ".rodata"
521
#define KCONFIG_SEC ".kconfig"
522
#define KSYMS_SEC ".ksyms"
523
#define STRUCT_OPS_SEC ".struct_ops"
524
#define STRUCT_OPS_LINK_SEC ".struct_ops.link"
525
#define ARENA_SEC ".addr_space.1"
526
527
enum libbpf_map_type {
528
LIBBPF_MAP_UNSPEC,
529
LIBBPF_MAP_DATA,
530
LIBBPF_MAP_BSS,
531
LIBBPF_MAP_RODATA,
532
LIBBPF_MAP_KCONFIG,
533
};
534
535
struct bpf_map_def {
536
unsigned int type;
537
unsigned int key_size;
538
unsigned int value_size;
539
unsigned int max_entries;
540
unsigned int map_flags;
541
};
542
543
struct bpf_map {
544
struct bpf_object *obj;
545
char *name;
546
/* real_name is defined for special internal maps (.rodata*,
547
* .data*, .bss, .kconfig) and preserves their original ELF section
548
* name. This is important to be able to find corresponding BTF
549
* DATASEC information.
550
*/
551
char *real_name;
552
int fd;
553
int sec_idx;
554
size_t sec_offset;
555
int map_ifindex;
556
int inner_map_fd;
557
struct bpf_map_def def;
558
__u32 numa_node;
559
__u32 btf_var_idx;
560
int mod_btf_fd;
561
__u32 btf_key_type_id;
562
__u32 btf_value_type_id;
563
__u32 btf_vmlinux_value_type_id;
564
enum libbpf_map_type libbpf_type;
565
void *mmaped;
566
struct bpf_struct_ops *st_ops;
567
struct bpf_map *inner_map;
568
void **init_slots;
569
int init_slots_sz;
570
char *pin_path;
571
bool pinned;
572
bool reused;
573
bool autocreate;
574
bool autoattach;
575
__u64 map_extra;
576
struct bpf_program *excl_prog;
577
};
578
579
enum extern_type {
580
EXT_UNKNOWN,
581
EXT_KCFG,
582
EXT_KSYM,
583
};
584
585
enum kcfg_type {
586
KCFG_UNKNOWN,
587
KCFG_CHAR,
588
KCFG_BOOL,
589
KCFG_INT,
590
KCFG_TRISTATE,
591
KCFG_CHAR_ARR,
592
};
593
594
struct extern_desc {
595
enum extern_type type;
596
int sym_idx;
597
int btf_id;
598
int sec_btf_id;
599
char *name;
600
char *essent_name;
601
bool is_set;
602
bool is_weak;
603
union {
604
struct {
605
enum kcfg_type type;
606
int sz;
607
int align;
608
int data_off;
609
bool is_signed;
610
} kcfg;
611
struct {
612
unsigned long long addr;
613
614
/* target btf_id of the corresponding kernel var. */
615
int kernel_btf_obj_fd;
616
int kernel_btf_id;
617
618
/* local btf_id of the ksym extern's type. */
619
__u32 type_id;
620
/* BTF fd index to be patched in for insn->off, this is
621
* 0 for vmlinux BTF, index in obj->fd_array for module
622
* BTF
623
*/
624
__s16 btf_fd_idx;
625
} ksym;
626
};
627
};
628
629
struct module_btf {
630
struct btf *btf;
631
char *name;
632
__u32 id;
633
int fd;
634
int fd_array_idx;
635
};
636
637
enum sec_type {
638
SEC_UNUSED = 0,
639
SEC_RELO,
640
SEC_BSS,
641
SEC_DATA,
642
SEC_RODATA,
643
SEC_ST_OPS,
644
};
645
646
struct elf_sec_desc {
647
enum sec_type sec_type;
648
Elf64_Shdr *shdr;
649
Elf_Data *data;
650
};
651
652
struct elf_state {
653
int fd;
654
const void *obj_buf;
655
size_t obj_buf_sz;
656
Elf *elf;
657
Elf64_Ehdr *ehdr;
658
Elf_Data *symbols;
659
Elf_Data *arena_data;
660
size_t shstrndx; /* section index for section name strings */
661
size_t strtabidx;
662
struct elf_sec_desc *secs;
663
size_t sec_cnt;
664
int btf_maps_shndx;
665
__u32 btf_maps_sec_btf_id;
666
int text_shndx;
667
int symbols_shndx;
668
bool has_st_ops;
669
int arena_data_shndx;
670
};
671
672
struct usdt_manager;
673
674
enum bpf_object_state {
675
OBJ_OPEN,
676
OBJ_PREPARED,
677
OBJ_LOADED,
678
};
679
680
struct bpf_object {
681
char name[BPF_OBJ_NAME_LEN];
682
char license[64];
683
__u32 kern_version;
684
685
enum bpf_object_state state;
686
struct bpf_program *programs;
687
size_t nr_programs;
688
struct bpf_map *maps;
689
size_t nr_maps;
690
size_t maps_cap;
691
692
char *kconfig;
693
struct extern_desc *externs;
694
int nr_extern;
695
int kconfig_map_idx;
696
697
bool has_subcalls;
698
bool has_rodata;
699
700
struct bpf_gen *gen_loader;
701
702
/* Information when doing ELF related work. Only valid if efile.elf is not NULL */
703
struct elf_state efile;
704
705
unsigned char byteorder;
706
707
struct btf *btf;
708
struct btf_ext *btf_ext;
709
710
/* Parse and load BTF vmlinux if any of the programs in the object need
711
* it at load time.
712
*/
713
struct btf *btf_vmlinux;
714
/* Path to the custom BTF to be used for BPF CO-RE relocations as an
715
* override for vmlinux BTF.
716
*/
717
char *btf_custom_path;
718
/* vmlinux BTF override for CO-RE relocations */
719
struct btf *btf_vmlinux_override;
720
/* Lazily initialized kernel module BTFs */
721
struct module_btf *btf_modules;
722
bool btf_modules_loaded;
723
size_t btf_module_cnt;
724
size_t btf_module_cap;
725
726
/* optional log settings passed to BPF_BTF_LOAD and BPF_PROG_LOAD commands */
727
char *log_buf;
728
size_t log_size;
729
__u32 log_level;
730
731
int *fd_array;
732
size_t fd_array_cap;
733
size_t fd_array_cnt;
734
735
struct usdt_manager *usdt_man;
736
737
int arena_map_idx;
738
void *arena_data;
739
size_t arena_data_sz;
740
741
struct kern_feature_cache *feat_cache;
742
char *token_path;
743
int token_fd;
744
745
char path[];
746
};
747
748
static const char *elf_sym_str(const struct bpf_object *obj, size_t off);
749
static const char *elf_sec_str(const struct bpf_object *obj, size_t off);
750
static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx);
751
static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name);
752
static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn);
753
static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn);
754
static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn);
755
static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx);
756
static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx);
757
758
void bpf_program__unload(struct bpf_program *prog)
759
{
760
if (!prog)
761
return;
762
763
zclose(prog->fd);
764
765
zfree(&prog->func_info);
766
zfree(&prog->line_info);
767
}
768
769
static void bpf_program__exit(struct bpf_program *prog)
770
{
771
if (!prog)
772
return;
773
774
bpf_program__unload(prog);
775
zfree(&prog->name);
776
zfree(&prog->sec_name);
777
zfree(&prog->insns);
778
zfree(&prog->reloc_desc);
779
780
prog->nr_reloc = 0;
781
prog->insns_cnt = 0;
782
prog->sec_idx = -1;
783
}
784
785
static bool insn_is_subprog_call(const struct bpf_insn *insn)
786
{
787
return BPF_CLASS(insn->code) == BPF_JMP &&
788
BPF_OP(insn->code) == BPF_CALL &&
789
BPF_SRC(insn->code) == BPF_K &&
790
insn->src_reg == BPF_PSEUDO_CALL &&
791
insn->dst_reg == 0 &&
792
insn->off == 0;
793
}
794
795
static bool is_call_insn(const struct bpf_insn *insn)
796
{
797
return insn->code == (BPF_JMP | BPF_CALL);
798
}
799
800
static bool insn_is_pseudo_func(struct bpf_insn *insn)
801
{
802
return is_ldimm64_insn(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
803
}
804
805
static int
806
bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
807
const char *name, size_t sec_idx, const char *sec_name,
808
size_t sec_off, void *insn_data, size_t insn_data_sz)
809
{
810
if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) {
811
pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n",
812
sec_name, name, sec_off, insn_data_sz);
813
return -EINVAL;
814
}
815
816
memset(prog, 0, sizeof(*prog));
817
prog->obj = obj;
818
819
prog->sec_idx = sec_idx;
820
prog->sec_insn_off = sec_off / BPF_INSN_SZ;
821
prog->sec_insn_cnt = insn_data_sz / BPF_INSN_SZ;
822
/* insns_cnt can later be increased by appending used subprograms */
823
prog->insns_cnt = prog->sec_insn_cnt;
824
825
prog->type = BPF_PROG_TYPE_UNSPEC;
826
prog->fd = -1;
827
prog->exception_cb_idx = -1;
828
829
/* libbpf's convention for SEC("?abc...") is that it's just like
830
* SEC("abc...") but the corresponding bpf_program starts out with
831
* autoload set to false.
832
*/
833
if (sec_name[0] == '?') {
834
prog->autoload = false;
835
/* from now on forget there was ? in section name */
836
sec_name++;
837
} else {
838
prog->autoload = true;
839
}
840
841
prog->autoattach = true;
842
843
/* inherit object's log_level */
844
prog->log_level = obj->log_level;
845
846
prog->sec_name = strdup(sec_name);
847
if (!prog->sec_name)
848
goto errout;
849
850
prog->name = strdup(name);
851
if (!prog->name)
852
goto errout;
853
854
prog->insns = malloc(insn_data_sz);
855
if (!prog->insns)
856
goto errout;
857
memcpy(prog->insns, insn_data, insn_data_sz);
858
859
return 0;
860
errout:
861
pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name);
862
bpf_program__exit(prog);
863
return -ENOMEM;
864
}
865
866
static int
867
bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
868
const char *sec_name, int sec_idx)
869
{
870
Elf_Data *symbols = obj->efile.symbols;
871
struct bpf_program *prog, *progs;
872
void *data = sec_data->d_buf;
873
size_t sec_sz = sec_data->d_size, sec_off, prog_sz, nr_syms;
874
int nr_progs, err, i;
875
const char *name;
876
Elf64_Sym *sym;
877
878
progs = obj->programs;
879
nr_progs = obj->nr_programs;
880
nr_syms = symbols->d_size / sizeof(Elf64_Sym);
881
882
for (i = 0; i < nr_syms; i++) {
883
sym = elf_sym_by_idx(obj, i);
884
885
if (sym->st_shndx != sec_idx)
886
continue;
887
if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
888
continue;
889
890
prog_sz = sym->st_size;
891
sec_off = sym->st_value;
892
893
name = elf_sym_str(obj, sym->st_name);
894
if (!name) {
895
pr_warn("sec '%s': failed to get symbol name for offset %zu\n",
896
sec_name, sec_off);
897
return -LIBBPF_ERRNO__FORMAT;
898
}
899
900
if (sec_off + prog_sz > sec_sz || sec_off + prog_sz < sec_off) {
901
pr_warn("sec '%s': program at offset %zu crosses section boundary\n",
902
sec_name, sec_off);
903
return -LIBBPF_ERRNO__FORMAT;
904
}
905
906
if (sec_idx != obj->efile.text_shndx && ELF64_ST_BIND(sym->st_info) == STB_LOCAL) {
907
pr_warn("sec '%s': program '%s' is static and not supported\n", sec_name, name);
908
return -ENOTSUP;
909
}
910
911
pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n",
912
sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz);
913
914
progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(*progs));
915
if (!progs) {
916
/*
917
* In this case the original obj->programs
918
* is still valid, so don't need special treat for
919
* bpf_close_object().
920
*/
921
pr_warn("sec '%s': failed to alloc memory for new program '%s'\n",
922
sec_name, name);
923
return -ENOMEM;
924
}
925
obj->programs = progs;
926
927
prog = &progs[nr_progs];
928
929
err = bpf_object__init_prog(obj, prog, name, sec_idx, sec_name,
930
sec_off, data + sec_off, prog_sz);
931
if (err)
932
return err;
933
934
if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL)
935
prog->sym_global = true;
936
937
/* if function is a global/weak symbol, but has restricted
938
* (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF FUNC
939
* as static to enable more permissive BPF verification mode
940
* with more outside context available to BPF verifier
941
*/
942
if (prog->sym_global && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN
943
|| ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL))
944
prog->mark_btf_static = true;
945
946
nr_progs++;
947
obj->nr_programs = nr_progs;
948
}
949
950
return 0;
951
}
952
953
static void bpf_object_bswap_progs(struct bpf_object *obj)
954
{
955
struct bpf_program *prog = obj->programs;
956
struct bpf_insn *insn;
957
int p, i;
958
959
for (p = 0; p < obj->nr_programs; p++, prog++) {
960
insn = prog->insns;
961
for (i = 0; i < prog->insns_cnt; i++, insn++)
962
bpf_insn_bswap(insn);
963
}
964
pr_debug("converted %zu BPF programs to native byte order\n", obj->nr_programs);
965
}
966
967
static const struct btf_member *
968
find_member_by_offset(const struct btf_type *t, __u32 bit_offset)
969
{
970
struct btf_member *m;
971
int i;
972
973
for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
974
if (btf_member_bit_offset(t, i) == bit_offset)
975
return m;
976
}
977
978
return NULL;
979
}
980
981
static const struct btf_member *
982
find_member_by_name(const struct btf *btf, const struct btf_type *t,
983
const char *name)
984
{
985
struct btf_member *m;
986
int i;
987
988
for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
989
if (!strcmp(btf__name_by_offset(btf, m->name_off), name))
990
return m;
991
}
992
993
return NULL;
994
}
995
996
static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name,
997
__u16 kind, struct btf **res_btf,
998
struct module_btf **res_mod_btf);
999
1000
#define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_"
1001
static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
1002
const char *name, __u32 kind);
1003
1004
static int
1005
find_struct_ops_kern_types(struct bpf_object *obj, const char *tname_raw,
1006
struct module_btf **mod_btf,
1007
const struct btf_type **type, __u32 *type_id,
1008
const struct btf_type **vtype, __u32 *vtype_id,
1009
const struct btf_member **data_member)
1010
{
1011
const struct btf_type *kern_type, *kern_vtype;
1012
const struct btf_member *kern_data_member;
1013
struct btf *btf = NULL;
1014
__s32 kern_vtype_id, kern_type_id;
1015
char tname[192], stname[256];
1016
__u32 i;
1017
1018
snprintf(tname, sizeof(tname), "%.*s",
1019
(int)bpf_core_essential_name_len(tname_raw), tname_raw);
1020
1021
snprintf(stname, sizeof(stname), "%s%s", STRUCT_OPS_VALUE_PREFIX, tname);
1022
1023
/* Look for the corresponding "map_value" type that will be used
1024
* in map_update(BPF_MAP_TYPE_STRUCT_OPS) first, figure out the btf
1025
* and the mod_btf.
1026
* For example, find "struct bpf_struct_ops_tcp_congestion_ops".
1027
*/
1028
kern_vtype_id = find_ksym_btf_id(obj, stname, BTF_KIND_STRUCT, &btf, mod_btf);
1029
if (kern_vtype_id < 0) {
1030
pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", stname);
1031
return kern_vtype_id;
1032
}
1033
kern_vtype = btf__type_by_id(btf, kern_vtype_id);
1034
1035
kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT);
1036
if (kern_type_id < 0) {
1037
pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", tname);
1038
return kern_type_id;
1039
}
1040
kern_type = btf__type_by_id(btf, kern_type_id);
1041
1042
/* Find "struct tcp_congestion_ops" from
1043
* struct bpf_struct_ops_tcp_congestion_ops {
1044
* [ ... ]
1045
* struct tcp_congestion_ops data;
1046
* }
1047
*/
1048
kern_data_member = btf_members(kern_vtype);
1049
for (i = 0; i < btf_vlen(kern_vtype); i++, kern_data_member++) {
1050
if (kern_data_member->type == kern_type_id)
1051
break;
1052
}
1053
if (i == btf_vlen(kern_vtype)) {
1054
pr_warn("struct_ops init_kern: struct %s data is not found in struct %s\n",
1055
tname, stname);
1056
return -EINVAL;
1057
}
1058
1059
*type = kern_type;
1060
*type_id = kern_type_id;
1061
*vtype = kern_vtype;
1062
*vtype_id = kern_vtype_id;
1063
*data_member = kern_data_member;
1064
1065
return 0;
1066
}
1067
1068
static bool bpf_map__is_struct_ops(const struct bpf_map *map)
1069
{
1070
return map->def.type == BPF_MAP_TYPE_STRUCT_OPS;
1071
}
1072
1073
static bool is_valid_st_ops_program(struct bpf_object *obj,
1074
const struct bpf_program *prog)
1075
{
1076
int i;
1077
1078
for (i = 0; i < obj->nr_programs; i++) {
1079
if (&obj->programs[i] == prog)
1080
return prog->type == BPF_PROG_TYPE_STRUCT_OPS;
1081
}
1082
1083
return false;
1084
}
1085
1086
/* For each struct_ops program P, referenced from some struct_ops map M,
1087
* enable P.autoload if there are Ms for which M.autocreate is true,
1088
* disable P.autoload if for all Ms M.autocreate is false.
1089
* Don't change P.autoload for programs that are not referenced from any maps.
1090
*/
1091
static int bpf_object_adjust_struct_ops_autoload(struct bpf_object *obj)
1092
{
1093
struct bpf_program *prog, *slot_prog;
1094
struct bpf_map *map;
1095
int i, j, k, vlen;
1096
1097
for (i = 0; i < obj->nr_programs; ++i) {
1098
int should_load = false;
1099
int use_cnt = 0;
1100
1101
prog = &obj->programs[i];
1102
if (prog->type != BPF_PROG_TYPE_STRUCT_OPS)
1103
continue;
1104
1105
for (j = 0; j < obj->nr_maps; ++j) {
1106
const struct btf_type *type;
1107
1108
map = &obj->maps[j];
1109
if (!bpf_map__is_struct_ops(map))
1110
continue;
1111
1112
type = btf__type_by_id(obj->btf, map->st_ops->type_id);
1113
vlen = btf_vlen(type);
1114
for (k = 0; k < vlen; ++k) {
1115
slot_prog = map->st_ops->progs[k];
1116
if (prog != slot_prog)
1117
continue;
1118
1119
use_cnt++;
1120
if (map->autocreate)
1121
should_load = true;
1122
}
1123
}
1124
if (use_cnt)
1125
prog->autoload = should_load;
1126
}
1127
1128
return 0;
1129
}
1130
1131
/* Init the map's fields that depend on kern_btf */
1132
static int bpf_map__init_kern_struct_ops(struct bpf_map *map)
1133
{
1134
const struct btf_member *member, *kern_member, *kern_data_member;
1135
const struct btf_type *type, *kern_type, *kern_vtype;
1136
__u32 i, kern_type_id, kern_vtype_id, kern_data_off;
1137
struct bpf_object *obj = map->obj;
1138
const struct btf *btf = obj->btf;
1139
struct bpf_struct_ops *st_ops;
1140
const struct btf *kern_btf;
1141
struct module_btf *mod_btf = NULL;
1142
void *data, *kern_data;
1143
const char *tname;
1144
int err;
1145
1146
st_ops = map->st_ops;
1147
type = btf__type_by_id(btf, st_ops->type_id);
1148
tname = btf__name_by_offset(btf, type->name_off);
1149
err = find_struct_ops_kern_types(obj, tname, &mod_btf,
1150
&kern_type, &kern_type_id,
1151
&kern_vtype, &kern_vtype_id,
1152
&kern_data_member);
1153
if (err)
1154
return err;
1155
1156
kern_btf = mod_btf ? mod_btf->btf : obj->btf_vmlinux;
1157
1158
pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n",
1159
map->name, st_ops->type_id, kern_type_id, kern_vtype_id);
1160
1161
map->mod_btf_fd = mod_btf ? mod_btf->fd : -1;
1162
map->def.value_size = kern_vtype->size;
1163
map->btf_vmlinux_value_type_id = kern_vtype_id;
1164
1165
st_ops->kern_vdata = calloc(1, kern_vtype->size);
1166
if (!st_ops->kern_vdata)
1167
return -ENOMEM;
1168
1169
data = st_ops->data;
1170
kern_data_off = kern_data_member->offset / 8;
1171
kern_data = st_ops->kern_vdata + kern_data_off;
1172
1173
member = btf_members(type);
1174
for (i = 0; i < btf_vlen(type); i++, member++) {
1175
const struct btf_type *mtype, *kern_mtype;
1176
__u32 mtype_id, kern_mtype_id;
1177
void *mdata, *kern_mdata;
1178
struct bpf_program *prog;
1179
__s64 msize, kern_msize;
1180
__u32 moff, kern_moff;
1181
__u32 kern_member_idx;
1182
const char *mname;
1183
1184
mname = btf__name_by_offset(btf, member->name_off);
1185
moff = member->offset / 8;
1186
mdata = data + moff;
1187
msize = btf__resolve_size(btf, member->type);
1188
if (msize < 0) {
1189
pr_warn("struct_ops init_kern %s: failed to resolve the size of member %s\n",
1190
map->name, mname);
1191
return msize;
1192
}
1193
1194
kern_member = find_member_by_name(kern_btf, kern_type, mname);
1195
if (!kern_member) {
1196
if (!libbpf_is_mem_zeroed(mdata, msize)) {
1197
pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n",
1198
map->name, mname);
1199
return -ENOTSUP;
1200
}
1201
1202
if (st_ops->progs[i]) {
1203
/* If we had declaratively set struct_ops callback, we need to
1204
* force its autoload to false, because it doesn't have
1205
* a chance of succeeding from POV of the current struct_ops map.
1206
* If this program is still referenced somewhere else, though,
1207
* then bpf_object_adjust_struct_ops_autoload() will update its
1208
* autoload accordingly.
1209
*/
1210
st_ops->progs[i]->autoload = false;
1211
st_ops->progs[i] = NULL;
1212
}
1213
1214
/* Skip all-zero/NULL fields if they are not present in the kernel BTF */
1215
pr_info("struct_ops %s: member %s not found in kernel, skipping it as it's set to zero\n",
1216
map->name, mname);
1217
continue;
1218
}
1219
1220
kern_member_idx = kern_member - btf_members(kern_type);
1221
if (btf_member_bitfield_size(type, i) ||
1222
btf_member_bitfield_size(kern_type, kern_member_idx)) {
1223
pr_warn("struct_ops init_kern %s: bitfield %s is not supported\n",
1224
map->name, mname);
1225
return -ENOTSUP;
1226
}
1227
1228
kern_moff = kern_member->offset / 8;
1229
kern_mdata = kern_data + kern_moff;
1230
1231
mtype = skip_mods_and_typedefs(btf, member->type, &mtype_id);
1232
kern_mtype = skip_mods_and_typedefs(kern_btf, kern_member->type,
1233
&kern_mtype_id);
1234
if (BTF_INFO_KIND(mtype->info) !=
1235
BTF_INFO_KIND(kern_mtype->info)) {
1236
pr_warn("struct_ops init_kern %s: Unmatched member type %s %u != %u(kernel)\n",
1237
map->name, mname, BTF_INFO_KIND(mtype->info),
1238
BTF_INFO_KIND(kern_mtype->info));
1239
return -ENOTSUP;
1240
}
1241
1242
if (btf_is_ptr(mtype)) {
1243
prog = *(void **)mdata;
1244
/* just like for !kern_member case above, reset declaratively
1245
* set (at compile time) program's autload to false,
1246
* if user replaced it with another program or NULL
1247
*/
1248
if (st_ops->progs[i] && st_ops->progs[i] != prog)
1249
st_ops->progs[i]->autoload = false;
1250
1251
/* Update the value from the shadow type */
1252
st_ops->progs[i] = prog;
1253
if (!prog)
1254
continue;
1255
1256
if (!is_valid_st_ops_program(obj, prog)) {
1257
pr_warn("struct_ops init_kern %s: member %s is not a struct_ops program\n",
1258
map->name, mname);
1259
return -ENOTSUP;
1260
}
1261
1262
kern_mtype = skip_mods_and_typedefs(kern_btf,
1263
kern_mtype->type,
1264
&kern_mtype_id);
1265
1266
/* mtype->type must be a func_proto which was
1267
* guaranteed in bpf_object__collect_st_ops_relos(),
1268
* so only check kern_mtype for func_proto here.
1269
*/
1270
if (!btf_is_func_proto(kern_mtype)) {
1271
pr_warn("struct_ops init_kern %s: kernel member %s is not a func ptr\n",
1272
map->name, mname);
1273
return -ENOTSUP;
1274
}
1275
1276
if (mod_btf)
1277
prog->attach_btf_obj_fd = mod_btf->fd;
1278
1279
/* if we haven't yet processed this BPF program, record proper
1280
* attach_btf_id and member_idx
1281
*/
1282
if (!prog->attach_btf_id) {
1283
prog->attach_btf_id = kern_type_id;
1284
prog->expected_attach_type = kern_member_idx;
1285
}
1286
1287
/* struct_ops BPF prog can be re-used between multiple
1288
* .struct_ops & .struct_ops.link as long as it's the
1289
* same struct_ops struct definition and the same
1290
* function pointer field
1291
*/
1292
if (prog->attach_btf_id != kern_type_id) {
1293
pr_warn("struct_ops init_kern %s func ptr %s: invalid reuse of prog %s in sec %s with type %u: attach_btf_id %u != kern_type_id %u\n",
1294
map->name, mname, prog->name, prog->sec_name, prog->type,
1295
prog->attach_btf_id, kern_type_id);
1296
return -EINVAL;
1297
}
1298
if (prog->expected_attach_type != kern_member_idx) {
1299
pr_warn("struct_ops init_kern %s func ptr %s: invalid reuse of prog %s in sec %s with type %u: expected_attach_type %u != kern_member_idx %u\n",
1300
map->name, mname, prog->name, prog->sec_name, prog->type,
1301
prog->expected_attach_type, kern_member_idx);
1302
return -EINVAL;
1303
}
1304
1305
st_ops->kern_func_off[i] = kern_data_off + kern_moff;
1306
1307
pr_debug("struct_ops init_kern %s: func ptr %s is set to prog %s from data(+%u) to kern_data(+%u)\n",
1308
map->name, mname, prog->name, moff,
1309
kern_moff);
1310
1311
continue;
1312
}
1313
1314
kern_msize = btf__resolve_size(kern_btf, kern_mtype_id);
1315
if (kern_msize < 0 || msize != kern_msize) {
1316
pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n",
1317
map->name, mname, (ssize_t)msize,
1318
(ssize_t)kern_msize);
1319
return -ENOTSUP;
1320
}
1321
1322
pr_debug("struct_ops init_kern %s: copy %s %u bytes from data(+%u) to kern_data(+%u)\n",
1323
map->name, mname, (unsigned int)msize,
1324
moff, kern_moff);
1325
memcpy(kern_mdata, mdata, msize);
1326
}
1327
1328
return 0;
1329
}
1330
1331
static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj)
1332
{
1333
struct bpf_map *map;
1334
size_t i;
1335
int err;
1336
1337
for (i = 0; i < obj->nr_maps; i++) {
1338
map = &obj->maps[i];
1339
1340
if (!bpf_map__is_struct_ops(map))
1341
continue;
1342
1343
if (!map->autocreate)
1344
continue;
1345
1346
err = bpf_map__init_kern_struct_ops(map);
1347
if (err)
1348
return err;
1349
}
1350
1351
return 0;
1352
}
1353
1354
static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name,
1355
int shndx, Elf_Data *data)
1356
{
1357
const struct btf_type *type, *datasec;
1358
const struct btf_var_secinfo *vsi;
1359
struct bpf_struct_ops *st_ops;
1360
const char *tname, *var_name;
1361
__s32 type_id, datasec_id;
1362
const struct btf *btf;
1363
struct bpf_map *map;
1364
__u32 i;
1365
1366
if (shndx == -1)
1367
return 0;
1368
1369
btf = obj->btf;
1370
datasec_id = btf__find_by_name_kind(btf, sec_name,
1371
BTF_KIND_DATASEC);
1372
if (datasec_id < 0) {
1373
pr_warn("struct_ops init: DATASEC %s not found\n",
1374
sec_name);
1375
return -EINVAL;
1376
}
1377
1378
datasec = btf__type_by_id(btf, datasec_id);
1379
vsi = btf_var_secinfos(datasec);
1380
for (i = 0; i < btf_vlen(datasec); i++, vsi++) {
1381
type = btf__type_by_id(obj->btf, vsi->type);
1382
var_name = btf__name_by_offset(obj->btf, type->name_off);
1383
1384
type_id = btf__resolve_type(obj->btf, vsi->type);
1385
if (type_id < 0) {
1386
pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n",
1387
vsi->type, sec_name);
1388
return -EINVAL;
1389
}
1390
1391
type = btf__type_by_id(obj->btf, type_id);
1392
tname = btf__name_by_offset(obj->btf, type->name_off);
1393
if (!tname[0]) {
1394
pr_warn("struct_ops init: anonymous type is not supported\n");
1395
return -ENOTSUP;
1396
}
1397
if (!btf_is_struct(type)) {
1398
pr_warn("struct_ops init: %s is not a struct\n", tname);
1399
return -EINVAL;
1400
}
1401
1402
map = bpf_object__add_map(obj);
1403
if (IS_ERR(map))
1404
return PTR_ERR(map);
1405
1406
map->sec_idx = shndx;
1407
map->sec_offset = vsi->offset;
1408
map->name = strdup(var_name);
1409
if (!map->name)
1410
return -ENOMEM;
1411
map->btf_value_type_id = type_id;
1412
1413
/* Follow same convention as for programs autoload:
1414
* SEC("?.struct_ops") means map is not created by default.
1415
*/
1416
if (sec_name[0] == '?') {
1417
map->autocreate = false;
1418
/* from now on forget there was ? in section name */
1419
sec_name++;
1420
}
1421
1422
map->def.type = BPF_MAP_TYPE_STRUCT_OPS;
1423
map->def.key_size = sizeof(int);
1424
map->def.value_size = type->size;
1425
map->def.max_entries = 1;
1426
map->def.map_flags = strcmp(sec_name, STRUCT_OPS_LINK_SEC) == 0 ? BPF_F_LINK : 0;
1427
map->autoattach = true;
1428
1429
map->st_ops = calloc(1, sizeof(*map->st_ops));
1430
if (!map->st_ops)
1431
return -ENOMEM;
1432
st_ops = map->st_ops;
1433
st_ops->data = malloc(type->size);
1434
st_ops->progs = calloc(btf_vlen(type), sizeof(*st_ops->progs));
1435
st_ops->kern_func_off = malloc(btf_vlen(type) *
1436
sizeof(*st_ops->kern_func_off));
1437
if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off)
1438
return -ENOMEM;
1439
1440
if (vsi->offset + type->size > data->d_size) {
1441
pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n",
1442
var_name, sec_name);
1443
return -EINVAL;
1444
}
1445
1446
memcpy(st_ops->data,
1447
data->d_buf + vsi->offset,
1448
type->size);
1449
st_ops->type_id = type_id;
1450
1451
pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n",
1452
tname, type_id, var_name, vsi->offset);
1453
}
1454
1455
return 0;
1456
}
1457
1458
static int bpf_object_init_struct_ops(struct bpf_object *obj)
1459
{
1460
const char *sec_name;
1461
int sec_idx, err;
1462
1463
for (sec_idx = 0; sec_idx < obj->efile.sec_cnt; ++sec_idx) {
1464
struct elf_sec_desc *desc = &obj->efile.secs[sec_idx];
1465
1466
if (desc->sec_type != SEC_ST_OPS)
1467
continue;
1468
1469
sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
1470
if (!sec_name)
1471
return -LIBBPF_ERRNO__FORMAT;
1472
1473
err = init_struct_ops_maps(obj, sec_name, sec_idx, desc->data);
1474
if (err)
1475
return err;
1476
}
1477
1478
return 0;
1479
}
1480
1481
static struct bpf_object *bpf_object__new(const char *path,
1482
const void *obj_buf,
1483
size_t obj_buf_sz,
1484
const char *obj_name)
1485
{
1486
struct bpf_object *obj;
1487
char *end;
1488
1489
obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1);
1490
if (!obj) {
1491
pr_warn("alloc memory failed for %s\n", path);
1492
return ERR_PTR(-ENOMEM);
1493
}
1494
1495
strcpy(obj->path, path);
1496
if (obj_name) {
1497
libbpf_strlcpy(obj->name, obj_name, sizeof(obj->name));
1498
} else {
1499
/* Using basename() GNU version which doesn't modify arg. */
1500
libbpf_strlcpy(obj->name, basename((void *)path), sizeof(obj->name));
1501
end = strchr(obj->name, '.');
1502
if (end)
1503
*end = 0;
1504
}
1505
1506
obj->efile.fd = -1;
1507
/*
1508
* Caller of this function should also call
1509
* bpf_object__elf_finish() after data collection to return
1510
* obj_buf to user. If not, we should duplicate the buffer to
1511
* avoid user freeing them before elf finish.
1512
*/
1513
obj->efile.obj_buf = obj_buf;
1514
obj->efile.obj_buf_sz = obj_buf_sz;
1515
obj->efile.btf_maps_shndx = -1;
1516
obj->kconfig_map_idx = -1;
1517
obj->arena_map_idx = -1;
1518
1519
obj->kern_version = get_kernel_version();
1520
obj->state = OBJ_OPEN;
1521
1522
return obj;
1523
}
1524
1525
static void bpf_object__elf_finish(struct bpf_object *obj)
1526
{
1527
if (!obj->efile.elf)
1528
return;
1529
1530
elf_end(obj->efile.elf);
1531
obj->efile.elf = NULL;
1532
obj->efile.ehdr = NULL;
1533
obj->efile.symbols = NULL;
1534
obj->efile.arena_data = NULL;
1535
1536
zfree(&obj->efile.secs);
1537
obj->efile.sec_cnt = 0;
1538
zclose(obj->efile.fd);
1539
obj->efile.obj_buf = NULL;
1540
obj->efile.obj_buf_sz = 0;
1541
}
1542
1543
static int bpf_object__elf_init(struct bpf_object *obj)
1544
{
1545
Elf64_Ehdr *ehdr;
1546
int err = 0;
1547
Elf *elf;
1548
1549
if (obj->efile.elf) {
1550
pr_warn("elf: init internal error\n");
1551
return -LIBBPF_ERRNO__LIBELF;
1552
}
1553
1554
if (obj->efile.obj_buf_sz > 0) {
1555
/* obj_buf should have been validated by bpf_object__open_mem(). */
1556
elf = elf_memory((char *)obj->efile.obj_buf, obj->efile.obj_buf_sz);
1557
} else {
1558
obj->efile.fd = open(obj->path, O_RDONLY | O_CLOEXEC);
1559
if (obj->efile.fd < 0) {
1560
err = -errno;
1561
pr_warn("elf: failed to open %s: %s\n", obj->path, errstr(err));
1562
return err;
1563
}
1564
1565
elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL);
1566
}
1567
1568
if (!elf) {
1569
pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1));
1570
err = -LIBBPF_ERRNO__LIBELF;
1571
goto errout;
1572
}
1573
1574
obj->efile.elf = elf;
1575
1576
if (elf_kind(elf) != ELF_K_ELF) {
1577
err = -LIBBPF_ERRNO__FORMAT;
1578
pr_warn("elf: '%s' is not a proper ELF object\n", obj->path);
1579
goto errout;
1580
}
1581
1582
if (gelf_getclass(elf) != ELFCLASS64) {
1583
err = -LIBBPF_ERRNO__FORMAT;
1584
pr_warn("elf: '%s' is not a 64-bit ELF object\n", obj->path);
1585
goto errout;
1586
}
1587
1588
obj->efile.ehdr = ehdr = elf64_getehdr(elf);
1589
if (!obj->efile.ehdr) {
1590
pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1));
1591
err = -LIBBPF_ERRNO__FORMAT;
1592
goto errout;
1593
}
1594
1595
/* Validate ELF object endianness... */
1596
if (ehdr->e_ident[EI_DATA] != ELFDATA2LSB &&
1597
ehdr->e_ident[EI_DATA] != ELFDATA2MSB) {
1598
err = -LIBBPF_ERRNO__ENDIAN;
1599
pr_warn("elf: '%s' has unknown byte order\n", obj->path);
1600
goto errout;
1601
}
1602
/* and save after bpf_object_open() frees ELF data */
1603
obj->byteorder = ehdr->e_ident[EI_DATA];
1604
1605
if (elf_getshdrstrndx(elf, &obj->efile.shstrndx)) {
1606
pr_warn("elf: failed to get section names section index for %s: %s\n",
1607
obj->path, elf_errmsg(-1));
1608
err = -LIBBPF_ERRNO__FORMAT;
1609
goto errout;
1610
}
1611
1612
/* ELF is corrupted/truncated, avoid calling elf_strptr. */
1613
if (!elf_rawdata(elf_getscn(elf, obj->efile.shstrndx), NULL)) {
1614
pr_warn("elf: failed to get section names strings from %s: %s\n",
1615
obj->path, elf_errmsg(-1));
1616
err = -LIBBPF_ERRNO__FORMAT;
1617
goto errout;
1618
}
1619
1620
/* Old LLVM set e_machine to EM_NONE */
1621
if (ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF)) {
1622
pr_warn("elf: %s is not a valid eBPF object file\n", obj->path);
1623
err = -LIBBPF_ERRNO__FORMAT;
1624
goto errout;
1625
}
1626
1627
return 0;
1628
errout:
1629
bpf_object__elf_finish(obj);
1630
return err;
1631
}
1632
1633
static bool is_native_endianness(struct bpf_object *obj)
1634
{
1635
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
1636
return obj->byteorder == ELFDATA2LSB;
1637
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
1638
return obj->byteorder == ELFDATA2MSB;
1639
#else
1640
# error "Unrecognized __BYTE_ORDER__"
1641
#endif
1642
}
1643
1644
static int
1645
bpf_object__init_license(struct bpf_object *obj, void *data, size_t size)
1646
{
1647
if (!data) {
1648
pr_warn("invalid license section in %s\n", obj->path);
1649
return -LIBBPF_ERRNO__FORMAT;
1650
}
1651
/* libbpf_strlcpy() only copies first N - 1 bytes, so size + 1 won't
1652
* go over allowed ELF data section buffer
1653
*/
1654
libbpf_strlcpy(obj->license, data, min(size + 1, sizeof(obj->license)));
1655
pr_debug("license of %s is %s\n", obj->path, obj->license);
1656
return 0;
1657
}
1658
1659
static int
1660
bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size)
1661
{
1662
__u32 kver;
1663
1664
if (!data || size != sizeof(kver)) {
1665
pr_warn("invalid kver section in %s\n", obj->path);
1666
return -LIBBPF_ERRNO__FORMAT;
1667
}
1668
memcpy(&kver, data, sizeof(kver));
1669
obj->kern_version = kver;
1670
pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version);
1671
return 0;
1672
}
1673
1674
static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
1675
{
1676
if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
1677
type == BPF_MAP_TYPE_HASH_OF_MAPS)
1678
return true;
1679
return false;
1680
}
1681
1682
static int find_elf_sec_sz(const struct bpf_object *obj, const char *name, __u32 *size)
1683
{
1684
Elf_Data *data;
1685
Elf_Scn *scn;
1686
1687
if (!name)
1688
return -EINVAL;
1689
1690
scn = elf_sec_by_name(obj, name);
1691
data = elf_sec_data(obj, scn);
1692
if (data) {
1693
*size = data->d_size;
1694
return 0; /* found it */
1695
}
1696
1697
return -ENOENT;
1698
}
1699
1700
static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *name)
1701
{
1702
Elf_Data *symbols = obj->efile.symbols;
1703
const char *sname;
1704
size_t si;
1705
1706
for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) {
1707
Elf64_Sym *sym = elf_sym_by_idx(obj, si);
1708
1709
if (ELF64_ST_TYPE(sym->st_info) != STT_OBJECT)
1710
continue;
1711
1712
if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL &&
1713
ELF64_ST_BIND(sym->st_info) != STB_WEAK)
1714
continue;
1715
1716
sname = elf_sym_str(obj, sym->st_name);
1717
if (!sname) {
1718
pr_warn("failed to get sym name string for var %s\n", name);
1719
return ERR_PTR(-EIO);
1720
}
1721
if (strcmp(name, sname) == 0)
1722
return sym;
1723
}
1724
1725
return ERR_PTR(-ENOENT);
1726
}
1727
1728
#ifndef MFD_CLOEXEC
1729
#define MFD_CLOEXEC 0x0001U
1730
#endif
1731
#ifndef MFD_NOEXEC_SEAL
1732
#define MFD_NOEXEC_SEAL 0x0008U
1733
#endif
1734
1735
static int create_placeholder_fd(void)
1736
{
1737
unsigned int flags = MFD_CLOEXEC | MFD_NOEXEC_SEAL;
1738
const char *name = "libbpf-placeholder-fd";
1739
int fd;
1740
1741
fd = ensure_good_fd(sys_memfd_create(name, flags));
1742
if (fd >= 0)
1743
return fd;
1744
else if (errno != EINVAL)
1745
return -errno;
1746
1747
/* Possibly running on kernel without MFD_NOEXEC_SEAL */
1748
fd = ensure_good_fd(sys_memfd_create(name, flags & ~MFD_NOEXEC_SEAL));
1749
if (fd < 0)
1750
return -errno;
1751
return fd;
1752
}
1753
1754
static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
1755
{
1756
struct bpf_map *map;
1757
int err;
1758
1759
err = libbpf_ensure_mem((void **)&obj->maps, &obj->maps_cap,
1760
sizeof(*obj->maps), obj->nr_maps + 1);
1761
if (err)
1762
return ERR_PTR(err);
1763
1764
map = &obj->maps[obj->nr_maps++];
1765
map->obj = obj;
1766
/* Preallocate map FD without actually creating BPF map just yet.
1767
* These map FD "placeholders" will be reused later without changing
1768
* FD value when map is actually created in the kernel.
1769
*
1770
* This is useful to be able to perform BPF program relocations
1771
* without having to create BPF maps before that step. This allows us
1772
* to finalize and load BTF very late in BPF object's loading phase,
1773
* right before BPF maps have to be created and BPF programs have to
1774
* be loaded. By having these map FD placeholders we can perform all
1775
* the sanitizations, relocations, and any other adjustments before we
1776
* start creating actual BPF kernel objects (BTF, maps, progs).
1777
*/
1778
map->fd = create_placeholder_fd();
1779
if (map->fd < 0)
1780
return ERR_PTR(map->fd);
1781
map->inner_map_fd = -1;
1782
map->autocreate = true;
1783
1784
return map;
1785
}
1786
1787
static size_t array_map_mmap_sz(unsigned int value_sz, unsigned int max_entries)
1788
{
1789
const long page_sz = sysconf(_SC_PAGE_SIZE);
1790
size_t map_sz;
1791
1792
map_sz = (size_t)roundup(value_sz, 8) * max_entries;
1793
map_sz = roundup(map_sz, page_sz);
1794
return map_sz;
1795
}
1796
1797
static size_t bpf_map_mmap_sz(const struct bpf_map *map)
1798
{
1799
const long page_sz = sysconf(_SC_PAGE_SIZE);
1800
1801
switch (map->def.type) {
1802
case BPF_MAP_TYPE_ARRAY:
1803
return array_map_mmap_sz(map->def.value_size, map->def.max_entries);
1804
case BPF_MAP_TYPE_ARENA:
1805
return page_sz * map->def.max_entries;
1806
default:
1807
return 0; /* not supported */
1808
}
1809
}
1810
1811
static int bpf_map_mmap_resize(struct bpf_map *map, size_t old_sz, size_t new_sz)
1812
{
1813
void *mmaped;
1814
1815
if (!map->mmaped)
1816
return -EINVAL;
1817
1818
if (old_sz == new_sz)
1819
return 0;
1820
1821
mmaped = mmap(NULL, new_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
1822
if (mmaped == MAP_FAILED)
1823
return -errno;
1824
1825
memcpy(mmaped, map->mmaped, min(old_sz, new_sz));
1826
munmap(map->mmaped, old_sz);
1827
map->mmaped = mmaped;
1828
return 0;
1829
}
1830
1831
static char *internal_map_name(struct bpf_object *obj, const char *real_name)
1832
{
1833
char map_name[BPF_OBJ_NAME_LEN], *p;
1834
int pfx_len, sfx_len = max((size_t)7, strlen(real_name));
1835
1836
/* This is one of the more confusing parts of libbpf for various
1837
* reasons, some of which are historical. The original idea for naming
1838
* internal names was to include as much of BPF object name prefix as
1839
* possible, so that it can be distinguished from similar internal
1840
* maps of a different BPF object.
1841
* As an example, let's say we have bpf_object named 'my_object_name'
1842
* and internal map corresponding to '.rodata' ELF section. The final
1843
* map name advertised to user and to the kernel will be
1844
* 'my_objec.rodata', taking first 8 characters of object name and
1845
* entire 7 characters of '.rodata'.
1846
* Somewhat confusingly, if internal map ELF section name is shorter
1847
* than 7 characters, e.g., '.bss', we still reserve 7 characters
1848
* for the suffix, even though we only have 4 actual characters, and
1849
* resulting map will be called 'my_objec.bss', not even using all 15
1850
* characters allowed by the kernel. Oh well, at least the truncated
1851
* object name is somewhat consistent in this case. But if the map
1852
* name is '.kconfig', we'll still have entirety of '.kconfig' added
1853
* (8 chars) and thus will be left with only first 7 characters of the
1854
* object name ('my_obje'). Happy guessing, user, that the final map
1855
* name will be "my_obje.kconfig".
1856
* Now, with libbpf starting to support arbitrarily named .rodata.*
1857
* and .data.* data sections, it's possible that ELF section name is
1858
* longer than allowed 15 chars, so we now need to be careful to take
1859
* only up to 15 first characters of ELF name, taking no BPF object
1860
* name characters at all. So '.rodata.abracadabra' will result in
1861
* '.rodata.abracad' kernel and user-visible name.
1862
* We need to keep this convoluted logic intact for .data, .bss and
1863
* .rodata maps, but for new custom .data.custom and .rodata.custom
1864
* maps we use their ELF names as is, not prepending bpf_object name
1865
* in front. We still need to truncate them to 15 characters for the
1866
* kernel. Full name can be recovered for such maps by using DATASEC
1867
* BTF type associated with such map's value type, though.
1868
*/
1869
if (sfx_len >= BPF_OBJ_NAME_LEN)
1870
sfx_len = BPF_OBJ_NAME_LEN - 1;
1871
1872
/* if there are two or more dots in map name, it's a custom dot map */
1873
if (strchr(real_name + 1, '.') != NULL)
1874
pfx_len = 0;
1875
else
1876
pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1, strlen(obj->name));
1877
1878
snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name,
1879
sfx_len, real_name);
1880
1881
/* sanities map name to characters allowed by kernel */
1882
for (p = map_name; *p && p < map_name + sizeof(map_name); p++)
1883
if (!isalnum(*p) && *p != '_' && *p != '.')
1884
*p = '_';
1885
1886
return strdup(map_name);
1887
}
1888
1889
static int
1890
map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map);
1891
1892
/* Internal BPF map is mmap()'able only if at least one of corresponding
1893
* DATASEC's VARs are to be exposed through BPF skeleton. I.e., it's a GLOBAL
1894
* variable and it's not marked as __hidden (which turns it into, effectively,
1895
* a STATIC variable).
1896
*/
1897
static bool map_is_mmapable(struct bpf_object *obj, struct bpf_map *map)
1898
{
1899
const struct btf_type *t, *vt;
1900
struct btf_var_secinfo *vsi;
1901
int i, n;
1902
1903
if (!map->btf_value_type_id)
1904
return false;
1905
1906
t = btf__type_by_id(obj->btf, map->btf_value_type_id);
1907
if (!btf_is_datasec(t))
1908
return false;
1909
1910
vsi = btf_var_secinfos(t);
1911
for (i = 0, n = btf_vlen(t); i < n; i++, vsi++) {
1912
vt = btf__type_by_id(obj->btf, vsi->type);
1913
if (!btf_is_var(vt))
1914
continue;
1915
1916
if (btf_var(vt)->linkage != BTF_VAR_STATIC)
1917
return true;
1918
}
1919
1920
return false;
1921
}
1922
1923
static int
1924
bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
1925
const char *real_name, int sec_idx, void *data, size_t data_sz)
1926
{
1927
struct bpf_map_def *def;
1928
struct bpf_map *map;
1929
size_t mmap_sz;
1930
int err;
1931
1932
map = bpf_object__add_map(obj);
1933
if (IS_ERR(map))
1934
return PTR_ERR(map);
1935
1936
map->libbpf_type = type;
1937
map->sec_idx = sec_idx;
1938
map->sec_offset = 0;
1939
map->real_name = strdup(real_name);
1940
map->name = internal_map_name(obj, real_name);
1941
if (!map->real_name || !map->name) {
1942
zfree(&map->real_name);
1943
zfree(&map->name);
1944
return -ENOMEM;
1945
}
1946
1947
def = &map->def;
1948
def->type = BPF_MAP_TYPE_ARRAY;
1949
def->key_size = sizeof(int);
1950
def->value_size = data_sz;
1951
def->max_entries = 1;
1952
def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG
1953
? BPF_F_RDONLY_PROG : 0;
1954
1955
/* failures are fine because of maps like .rodata.str1.1 */
1956
(void) map_fill_btf_type_info(obj, map);
1957
1958
if (map_is_mmapable(obj, map))
1959
def->map_flags |= BPF_F_MMAPABLE;
1960
1961
pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
1962
map->name, map->sec_idx, map->sec_offset, def->map_flags);
1963
1964
mmap_sz = bpf_map_mmap_sz(map);
1965
map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
1966
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
1967
if (map->mmaped == MAP_FAILED) {
1968
err = -errno;
1969
map->mmaped = NULL;
1970
pr_warn("failed to alloc map '%s' content buffer: %s\n", map->name, errstr(err));
1971
zfree(&map->real_name);
1972
zfree(&map->name);
1973
return err;
1974
}
1975
1976
if (data)
1977
memcpy(map->mmaped, data, data_sz);
1978
1979
pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name);
1980
return 0;
1981
}
1982
1983
static int bpf_object__init_global_data_maps(struct bpf_object *obj)
1984
{
1985
struct elf_sec_desc *sec_desc;
1986
const char *sec_name;
1987
int err = 0, sec_idx;
1988
1989
/*
1990
* Populate obj->maps with libbpf internal maps.
1991
*/
1992
for (sec_idx = 1; sec_idx < obj->efile.sec_cnt; sec_idx++) {
1993
sec_desc = &obj->efile.secs[sec_idx];
1994
1995
/* Skip recognized sections with size 0. */
1996
if (!sec_desc->data || sec_desc->data->d_size == 0)
1997
continue;
1998
1999
switch (sec_desc->sec_type) {
2000
case SEC_DATA:
2001
sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
2002
err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA,
2003
sec_name, sec_idx,
2004
sec_desc->data->d_buf,
2005
sec_desc->data->d_size);
2006
break;
2007
case SEC_RODATA:
2008
obj->has_rodata = true;
2009
sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
2010
err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA,
2011
sec_name, sec_idx,
2012
sec_desc->data->d_buf,
2013
sec_desc->data->d_size);
2014
break;
2015
case SEC_BSS:
2016
sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
2017
err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
2018
sec_name, sec_idx,
2019
NULL,
2020
sec_desc->data->d_size);
2021
break;
2022
default:
2023
/* skip */
2024
break;
2025
}
2026
if (err)
2027
return err;
2028
}
2029
return 0;
2030
}
2031
2032
2033
static struct extern_desc *find_extern_by_name(const struct bpf_object *obj,
2034
const void *name)
2035
{
2036
int i;
2037
2038
for (i = 0; i < obj->nr_extern; i++) {
2039
if (strcmp(obj->externs[i].name, name) == 0)
2040
return &obj->externs[i];
2041
}
2042
return NULL;
2043
}
2044
2045
static struct extern_desc *find_extern_by_name_with_len(const struct bpf_object *obj,
2046
const void *name, int len)
2047
{
2048
const char *ext_name;
2049
int i;
2050
2051
for (i = 0; i < obj->nr_extern; i++) {
2052
ext_name = obj->externs[i].name;
2053
if (strlen(ext_name) == len && strncmp(ext_name, name, len) == 0)
2054
return &obj->externs[i];
2055
}
2056
return NULL;
2057
}
2058
2059
static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val,
2060
char value)
2061
{
2062
switch (ext->kcfg.type) {
2063
case KCFG_BOOL:
2064
if (value == 'm') {
2065
pr_warn("extern (kcfg) '%s': value '%c' implies tristate or char type\n",
2066
ext->name, value);
2067
return -EINVAL;
2068
}
2069
*(bool *)ext_val = value == 'y' ? true : false;
2070
break;
2071
case KCFG_TRISTATE:
2072
if (value == 'y')
2073
*(enum libbpf_tristate *)ext_val = TRI_YES;
2074
else if (value == 'm')
2075
*(enum libbpf_tristate *)ext_val = TRI_MODULE;
2076
else /* value == 'n' */
2077
*(enum libbpf_tristate *)ext_val = TRI_NO;
2078
break;
2079
case KCFG_CHAR:
2080
*(char *)ext_val = value;
2081
break;
2082
case KCFG_UNKNOWN:
2083
case KCFG_INT:
2084
case KCFG_CHAR_ARR:
2085
default:
2086
pr_warn("extern (kcfg) '%s': value '%c' implies bool, tristate, or char type\n",
2087
ext->name, value);
2088
return -EINVAL;
2089
}
2090
ext->is_set = true;
2091
return 0;
2092
}
2093
2094
static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val,
2095
const char *value)
2096
{
2097
size_t len;
2098
2099
if (ext->kcfg.type != KCFG_CHAR_ARR) {
2100
pr_warn("extern (kcfg) '%s': value '%s' implies char array type\n",
2101
ext->name, value);
2102
return -EINVAL;
2103
}
2104
2105
len = strlen(value);
2106
if (len < 2 || value[len - 1] != '"') {
2107
pr_warn("extern (kcfg) '%s': invalid string config '%s'\n",
2108
ext->name, value);
2109
return -EINVAL;
2110
}
2111
2112
/* strip quotes */
2113
len -= 2;
2114
if (len >= ext->kcfg.sz) {
2115
pr_warn("extern (kcfg) '%s': long string '%s' of (%zu bytes) truncated to %d bytes\n",
2116
ext->name, value, len, ext->kcfg.sz - 1);
2117
len = ext->kcfg.sz - 1;
2118
}
2119
memcpy(ext_val, value + 1, len);
2120
ext_val[len] = '\0';
2121
ext->is_set = true;
2122
return 0;
2123
}
2124
2125
static int parse_u64(const char *value, __u64 *res)
2126
{
2127
char *value_end;
2128
int err;
2129
2130
errno = 0;
2131
*res = strtoull(value, &value_end, 0);
2132
if (errno) {
2133
err = -errno;
2134
pr_warn("failed to parse '%s': %s\n", value, errstr(err));
2135
return err;
2136
}
2137
if (*value_end) {
2138
pr_warn("failed to parse '%s' as integer completely\n", value);
2139
return -EINVAL;
2140
}
2141
return 0;
2142
}
2143
2144
static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v)
2145
{
2146
int bit_sz = ext->kcfg.sz * 8;
2147
2148
if (ext->kcfg.sz == 8)
2149
return true;
2150
2151
/* Validate that value stored in u64 fits in integer of `ext->sz`
2152
* bytes size without any loss of information. If the target integer
2153
* is signed, we rely on the following limits of integer type of
2154
* Y bits and subsequent transformation:
2155
*
2156
* -2^(Y-1) <= X <= 2^(Y-1) - 1
2157
* 0 <= X + 2^(Y-1) <= 2^Y - 1
2158
* 0 <= X + 2^(Y-1) < 2^Y
2159
*
2160
* For unsigned target integer, check that all the (64 - Y) bits are
2161
* zero.
2162
*/
2163
if (ext->kcfg.is_signed)
2164
return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz);
2165
else
2166
return (v >> bit_sz) == 0;
2167
}
2168
2169
static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val,
2170
__u64 value)
2171
{
2172
if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR &&
2173
ext->kcfg.type != KCFG_BOOL) {
2174
pr_warn("extern (kcfg) '%s': value '%llu' implies integer, char, or boolean type\n",
2175
ext->name, (unsigned long long)value);
2176
return -EINVAL;
2177
}
2178
if (ext->kcfg.type == KCFG_BOOL && value > 1) {
2179
pr_warn("extern (kcfg) '%s': value '%llu' isn't boolean compatible\n",
2180
ext->name, (unsigned long long)value);
2181
return -EINVAL;
2182
2183
}
2184
if (!is_kcfg_value_in_range(ext, value)) {
2185
pr_warn("extern (kcfg) '%s': value '%llu' doesn't fit in %d bytes\n",
2186
ext->name, (unsigned long long)value, ext->kcfg.sz);
2187
return -ERANGE;
2188
}
2189
switch (ext->kcfg.sz) {
2190
case 1:
2191
*(__u8 *)ext_val = value;
2192
break;
2193
case 2:
2194
*(__u16 *)ext_val = value;
2195
break;
2196
case 4:
2197
*(__u32 *)ext_val = value;
2198
break;
2199
case 8:
2200
*(__u64 *)ext_val = value;
2201
break;
2202
default:
2203
return -EINVAL;
2204
}
2205
ext->is_set = true;
2206
return 0;
2207
}
2208
2209
static int bpf_object__process_kconfig_line(struct bpf_object *obj,
2210
char *buf, void *data)
2211
{
2212
struct extern_desc *ext;
2213
char *sep, *value;
2214
int len, err = 0;
2215
void *ext_val;
2216
__u64 num;
2217
2218
if (!str_has_pfx(buf, "CONFIG_"))
2219
return 0;
2220
2221
sep = strchr(buf, '=');
2222
if (!sep) {
2223
pr_warn("failed to parse '%s': no separator\n", buf);
2224
return -EINVAL;
2225
}
2226
2227
/* Trim ending '\n' */
2228
len = strlen(buf);
2229
if (buf[len - 1] == '\n')
2230
buf[len - 1] = '\0';
2231
/* Split on '=' and ensure that a value is present. */
2232
*sep = '\0';
2233
if (!sep[1]) {
2234
*sep = '=';
2235
pr_warn("failed to parse '%s': no value\n", buf);
2236
return -EINVAL;
2237
}
2238
2239
ext = find_extern_by_name(obj, buf);
2240
if (!ext || ext->is_set)
2241
return 0;
2242
2243
ext_val = data + ext->kcfg.data_off;
2244
value = sep + 1;
2245
2246
switch (*value) {
2247
case 'y': case 'n': case 'm':
2248
err = set_kcfg_value_tri(ext, ext_val, *value);
2249
break;
2250
case '"':
2251
err = set_kcfg_value_str(ext, ext_val, value);
2252
break;
2253
default:
2254
/* assume integer */
2255
err = parse_u64(value, &num);
2256
if (err) {
2257
pr_warn("extern (kcfg) '%s': value '%s' isn't a valid integer\n", ext->name, value);
2258
return err;
2259
}
2260
if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) {
2261
pr_warn("extern (kcfg) '%s': value '%s' implies integer type\n", ext->name, value);
2262
return -EINVAL;
2263
}
2264
err = set_kcfg_value_num(ext, ext_val, num);
2265
break;
2266
}
2267
if (err)
2268
return err;
2269
pr_debug("extern (kcfg) '%s': set to %s\n", ext->name, value);
2270
return 0;
2271
}
2272
2273
static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data)
2274
{
2275
char buf[PATH_MAX];
2276
struct utsname uts;
2277
int len, err = 0;
2278
gzFile file;
2279
2280
uname(&uts);
2281
len = snprintf(buf, PATH_MAX, "/boot/config-%s", uts.release);
2282
if (len < 0)
2283
return -EINVAL;
2284
else if (len >= PATH_MAX)
2285
return -ENAMETOOLONG;
2286
2287
/* gzopen also accepts uncompressed files. */
2288
file = gzopen(buf, "re");
2289
if (!file)
2290
file = gzopen("/proc/config.gz", "re");
2291
2292
if (!file) {
2293
pr_warn("failed to open system Kconfig\n");
2294
return -ENOENT;
2295
}
2296
2297
while (gzgets(file, buf, sizeof(buf))) {
2298
err = bpf_object__process_kconfig_line(obj, buf, data);
2299
if (err) {
2300
pr_warn("error parsing system Kconfig line '%s': %s\n",
2301
buf, errstr(err));
2302
goto out;
2303
}
2304
}
2305
2306
out:
2307
gzclose(file);
2308
return err;
2309
}
2310
2311
static int bpf_object__read_kconfig_mem(struct bpf_object *obj,
2312
const char *config, void *data)
2313
{
2314
char buf[PATH_MAX];
2315
int err = 0;
2316
FILE *file;
2317
2318
file = fmemopen((void *)config, strlen(config), "r");
2319
if (!file) {
2320
err = -errno;
2321
pr_warn("failed to open in-memory Kconfig: %s\n", errstr(err));
2322
return err;
2323
}
2324
2325
while (fgets(buf, sizeof(buf), file)) {
2326
err = bpf_object__process_kconfig_line(obj, buf, data);
2327
if (err) {
2328
pr_warn("error parsing in-memory Kconfig line '%s': %s\n",
2329
buf, errstr(err));
2330
break;
2331
}
2332
}
2333
2334
fclose(file);
2335
return err;
2336
}
2337
2338
static int bpf_object__init_kconfig_map(struct bpf_object *obj)
2339
{
2340
struct extern_desc *last_ext = NULL, *ext;
2341
size_t map_sz;
2342
int i, err;
2343
2344
for (i = 0; i < obj->nr_extern; i++) {
2345
ext = &obj->externs[i];
2346
if (ext->type == EXT_KCFG)
2347
last_ext = ext;
2348
}
2349
2350
if (!last_ext)
2351
return 0;
2352
2353
map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz;
2354
err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG,
2355
".kconfig", obj->efile.symbols_shndx,
2356
NULL, map_sz);
2357
if (err)
2358
return err;
2359
2360
obj->kconfig_map_idx = obj->nr_maps - 1;
2361
2362
return 0;
2363
}
2364
2365
const struct btf_type *
2366
skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
2367
{
2368
const struct btf_type *t = btf__type_by_id(btf, id);
2369
2370
if (res_id)
2371
*res_id = id;
2372
2373
while (btf_is_mod(t) || btf_is_typedef(t)) {
2374
if (res_id)
2375
*res_id = t->type;
2376
t = btf__type_by_id(btf, t->type);
2377
}
2378
2379
return t;
2380
}
2381
2382
static const struct btf_type *
2383
resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)
2384
{
2385
const struct btf_type *t;
2386
2387
t = skip_mods_and_typedefs(btf, id, NULL);
2388
if (!btf_is_ptr(t))
2389
return NULL;
2390
2391
t = skip_mods_and_typedefs(btf, t->type, res_id);
2392
2393
return btf_is_func_proto(t) ? t : NULL;
2394
}
2395
2396
static const char *__btf_kind_str(__u16 kind)
2397
{
2398
switch (kind) {
2399
case BTF_KIND_UNKN: return "void";
2400
case BTF_KIND_INT: return "int";
2401
case BTF_KIND_PTR: return "ptr";
2402
case BTF_KIND_ARRAY: return "array";
2403
case BTF_KIND_STRUCT: return "struct";
2404
case BTF_KIND_UNION: return "union";
2405
case BTF_KIND_ENUM: return "enum";
2406
case BTF_KIND_FWD: return "fwd";
2407
case BTF_KIND_TYPEDEF: return "typedef";
2408
case BTF_KIND_VOLATILE: return "volatile";
2409
case BTF_KIND_CONST: return "const";
2410
case BTF_KIND_RESTRICT: return "restrict";
2411
case BTF_KIND_FUNC: return "func";
2412
case BTF_KIND_FUNC_PROTO: return "func_proto";
2413
case BTF_KIND_VAR: return "var";
2414
case BTF_KIND_DATASEC: return "datasec";
2415
case BTF_KIND_FLOAT: return "float";
2416
case BTF_KIND_DECL_TAG: return "decl_tag";
2417
case BTF_KIND_TYPE_TAG: return "type_tag";
2418
case BTF_KIND_ENUM64: return "enum64";
2419
default: return "unknown";
2420
}
2421
}
2422
2423
const char *btf_kind_str(const struct btf_type *t)
2424
{
2425
return __btf_kind_str(btf_kind(t));
2426
}
2427
2428
/*
2429
* Fetch integer attribute of BTF map definition. Such attributes are
2430
* represented using a pointer to an array, in which dimensionality of array
2431
* encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY];
2432
* encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF
2433
* type definition, while using only sizeof(void *) space in ELF data section.
2434
*/
2435
static bool get_map_field_int(const char *map_name, const struct btf *btf,
2436
const struct btf_member *m, __u32 *res)
2437
{
2438
const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
2439
const char *name = btf__name_by_offset(btf, m->name_off);
2440
const struct btf_array *arr_info;
2441
const struct btf_type *arr_t;
2442
2443
if (!btf_is_ptr(t)) {
2444
pr_warn("map '%s': attr '%s': expected PTR, got %s.\n",
2445
map_name, name, btf_kind_str(t));
2446
return false;
2447
}
2448
2449
arr_t = btf__type_by_id(btf, t->type);
2450
if (!arr_t) {
2451
pr_warn("map '%s': attr '%s': type [%u] not found.\n",
2452
map_name, name, t->type);
2453
return false;
2454
}
2455
if (!btf_is_array(arr_t)) {
2456
pr_warn("map '%s': attr '%s': expected ARRAY, got %s.\n",
2457
map_name, name, btf_kind_str(arr_t));
2458
return false;
2459
}
2460
arr_info = btf_array(arr_t);
2461
*res = arr_info->nelems;
2462
return true;
2463
}
2464
2465
static bool get_map_field_long(const char *map_name, const struct btf *btf,
2466
const struct btf_member *m, __u64 *res)
2467
{
2468
const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
2469
const char *name = btf__name_by_offset(btf, m->name_off);
2470
2471
if (btf_is_ptr(t)) {
2472
__u32 res32;
2473
bool ret;
2474
2475
ret = get_map_field_int(map_name, btf, m, &res32);
2476
if (ret)
2477
*res = (__u64)res32;
2478
return ret;
2479
}
2480
2481
if (!btf_is_enum(t) && !btf_is_enum64(t)) {
2482
pr_warn("map '%s': attr '%s': expected ENUM or ENUM64, got %s.\n",
2483
map_name, name, btf_kind_str(t));
2484
return false;
2485
}
2486
2487
if (btf_vlen(t) != 1) {
2488
pr_warn("map '%s': attr '%s': invalid __ulong\n",
2489
map_name, name);
2490
return false;
2491
}
2492
2493
if (btf_is_enum(t)) {
2494
const struct btf_enum *e = btf_enum(t);
2495
2496
*res = e->val;
2497
} else {
2498
const struct btf_enum64 *e = btf_enum64(t);
2499
2500
*res = btf_enum64_value(e);
2501
}
2502
return true;
2503
}
2504
2505
static int pathname_concat(char *buf, size_t buf_sz, const char *path, const char *name)
2506
{
2507
int len;
2508
2509
len = snprintf(buf, buf_sz, "%s/%s", path, name);
2510
if (len < 0)
2511
return -EINVAL;
2512
if (len >= buf_sz)
2513
return -ENAMETOOLONG;
2514
2515
return 0;
2516
}
2517
2518
static int build_map_pin_path(struct bpf_map *map, const char *path)
2519
{
2520
char buf[PATH_MAX];
2521
int err;
2522
2523
if (!path)
2524
path = BPF_FS_DEFAULT_PATH;
2525
2526
err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
2527
if (err)
2528
return err;
2529
2530
return bpf_map__set_pin_path(map, buf);
2531
}
2532
2533
/* should match definition in bpf_helpers.h */
2534
enum libbpf_pin_type {
2535
LIBBPF_PIN_NONE,
2536
/* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
2537
LIBBPF_PIN_BY_NAME,
2538
};
2539
2540
int parse_btf_map_def(const char *map_name, struct btf *btf,
2541
const struct btf_type *def_t, bool strict,
2542
struct btf_map_def *map_def, struct btf_map_def *inner_def)
2543
{
2544
const struct btf_type *t;
2545
const struct btf_member *m;
2546
bool is_inner = inner_def == NULL;
2547
int vlen, i;
2548
2549
vlen = btf_vlen(def_t);
2550
m = btf_members(def_t);
2551
for (i = 0; i < vlen; i++, m++) {
2552
const char *name = btf__name_by_offset(btf, m->name_off);
2553
2554
if (!name) {
2555
pr_warn("map '%s': invalid field #%d.\n", map_name, i);
2556
return -EINVAL;
2557
}
2558
if (strcmp(name, "type") == 0) {
2559
if (!get_map_field_int(map_name, btf, m, &map_def->map_type))
2560
return -EINVAL;
2561
map_def->parts |= MAP_DEF_MAP_TYPE;
2562
} else if (strcmp(name, "max_entries") == 0) {
2563
if (!get_map_field_int(map_name, btf, m, &map_def->max_entries))
2564
return -EINVAL;
2565
map_def->parts |= MAP_DEF_MAX_ENTRIES;
2566
} else if (strcmp(name, "map_flags") == 0) {
2567
if (!get_map_field_int(map_name, btf, m, &map_def->map_flags))
2568
return -EINVAL;
2569
map_def->parts |= MAP_DEF_MAP_FLAGS;
2570
} else if (strcmp(name, "numa_node") == 0) {
2571
if (!get_map_field_int(map_name, btf, m, &map_def->numa_node))
2572
return -EINVAL;
2573
map_def->parts |= MAP_DEF_NUMA_NODE;
2574
} else if (strcmp(name, "key_size") == 0) {
2575
__u32 sz;
2576
2577
if (!get_map_field_int(map_name, btf, m, &sz))
2578
return -EINVAL;
2579
if (map_def->key_size && map_def->key_size != sz) {
2580
pr_warn("map '%s': conflicting key size %u != %u.\n",
2581
map_name, map_def->key_size, sz);
2582
return -EINVAL;
2583
}
2584
map_def->key_size = sz;
2585
map_def->parts |= MAP_DEF_KEY_SIZE;
2586
} else if (strcmp(name, "key") == 0) {
2587
__s64 sz;
2588
2589
t = btf__type_by_id(btf, m->type);
2590
if (!t) {
2591
pr_warn("map '%s': key type [%d] not found.\n",
2592
map_name, m->type);
2593
return -EINVAL;
2594
}
2595
if (!btf_is_ptr(t)) {
2596
pr_warn("map '%s': key spec is not PTR: %s.\n",
2597
map_name, btf_kind_str(t));
2598
return -EINVAL;
2599
}
2600
sz = btf__resolve_size(btf, t->type);
2601
if (sz < 0) {
2602
pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n",
2603
map_name, t->type, (ssize_t)sz);
2604
return sz;
2605
}
2606
if (map_def->key_size && map_def->key_size != sz) {
2607
pr_warn("map '%s': conflicting key size %u != %zd.\n",
2608
map_name, map_def->key_size, (ssize_t)sz);
2609
return -EINVAL;
2610
}
2611
map_def->key_size = sz;
2612
map_def->key_type_id = t->type;
2613
map_def->parts |= MAP_DEF_KEY_SIZE | MAP_DEF_KEY_TYPE;
2614
} else if (strcmp(name, "value_size") == 0) {
2615
__u32 sz;
2616
2617
if (!get_map_field_int(map_name, btf, m, &sz))
2618
return -EINVAL;
2619
if (map_def->value_size && map_def->value_size != sz) {
2620
pr_warn("map '%s': conflicting value size %u != %u.\n",
2621
map_name, map_def->value_size, sz);
2622
return -EINVAL;
2623
}
2624
map_def->value_size = sz;
2625
map_def->parts |= MAP_DEF_VALUE_SIZE;
2626
} else if (strcmp(name, "value") == 0) {
2627
__s64 sz;
2628
2629
t = btf__type_by_id(btf, m->type);
2630
if (!t) {
2631
pr_warn("map '%s': value type [%d] not found.\n",
2632
map_name, m->type);
2633
return -EINVAL;
2634
}
2635
if (!btf_is_ptr(t)) {
2636
pr_warn("map '%s': value spec is not PTR: %s.\n",
2637
map_name, btf_kind_str(t));
2638
return -EINVAL;
2639
}
2640
sz = btf__resolve_size(btf, t->type);
2641
if (sz < 0) {
2642
pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n",
2643
map_name, t->type, (ssize_t)sz);
2644
return sz;
2645
}
2646
if (map_def->value_size && map_def->value_size != sz) {
2647
pr_warn("map '%s': conflicting value size %u != %zd.\n",
2648
map_name, map_def->value_size, (ssize_t)sz);
2649
return -EINVAL;
2650
}
2651
map_def->value_size = sz;
2652
map_def->value_type_id = t->type;
2653
map_def->parts |= MAP_DEF_VALUE_SIZE | MAP_DEF_VALUE_TYPE;
2654
}
2655
else if (strcmp(name, "values") == 0) {
2656
bool is_map_in_map = bpf_map_type__is_map_in_map(map_def->map_type);
2657
bool is_prog_array = map_def->map_type == BPF_MAP_TYPE_PROG_ARRAY;
2658
const char *desc = is_map_in_map ? "map-in-map inner" : "prog-array value";
2659
char inner_map_name[128];
2660
int err;
2661
2662
if (is_inner) {
2663
pr_warn("map '%s': multi-level inner maps not supported.\n",
2664
map_name);
2665
return -ENOTSUP;
2666
}
2667
if (i != vlen - 1) {
2668
pr_warn("map '%s': '%s' member should be last.\n",
2669
map_name, name);
2670
return -EINVAL;
2671
}
2672
if (!is_map_in_map && !is_prog_array) {
2673
pr_warn("map '%s': should be map-in-map or prog-array.\n",
2674
map_name);
2675
return -ENOTSUP;
2676
}
2677
if (map_def->value_size && map_def->value_size != 4) {
2678
pr_warn("map '%s': conflicting value size %u != 4.\n",
2679
map_name, map_def->value_size);
2680
return -EINVAL;
2681
}
2682
map_def->value_size = 4;
2683
t = btf__type_by_id(btf, m->type);
2684
if (!t) {
2685
pr_warn("map '%s': %s type [%d] not found.\n",
2686
map_name, desc, m->type);
2687
return -EINVAL;
2688
}
2689
if (!btf_is_array(t) || btf_array(t)->nelems) {
2690
pr_warn("map '%s': %s spec is not a zero-sized array.\n",
2691
map_name, desc);
2692
return -EINVAL;
2693
}
2694
t = skip_mods_and_typedefs(btf, btf_array(t)->type, NULL);
2695
if (!btf_is_ptr(t)) {
2696
pr_warn("map '%s': %s def is of unexpected kind %s.\n",
2697
map_name, desc, btf_kind_str(t));
2698
return -EINVAL;
2699
}
2700
t = skip_mods_and_typedefs(btf, t->type, NULL);
2701
if (is_prog_array) {
2702
if (!btf_is_func_proto(t)) {
2703
pr_warn("map '%s': prog-array value def is of unexpected kind %s.\n",
2704
map_name, btf_kind_str(t));
2705
return -EINVAL;
2706
}
2707
continue;
2708
}
2709
if (!btf_is_struct(t)) {
2710
pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
2711
map_name, btf_kind_str(t));
2712
return -EINVAL;
2713
}
2714
2715
snprintf(inner_map_name, sizeof(inner_map_name), "%s.inner", map_name);
2716
err = parse_btf_map_def(inner_map_name, btf, t, strict, inner_def, NULL);
2717
if (err)
2718
return err;
2719
2720
map_def->parts |= MAP_DEF_INNER_MAP;
2721
} else if (strcmp(name, "pinning") == 0) {
2722
__u32 val;
2723
2724
if (is_inner) {
2725
pr_warn("map '%s': inner def can't be pinned.\n", map_name);
2726
return -EINVAL;
2727
}
2728
if (!get_map_field_int(map_name, btf, m, &val))
2729
return -EINVAL;
2730
if (val != LIBBPF_PIN_NONE && val != LIBBPF_PIN_BY_NAME) {
2731
pr_warn("map '%s': invalid pinning value %u.\n",
2732
map_name, val);
2733
return -EINVAL;
2734
}
2735
map_def->pinning = val;
2736
map_def->parts |= MAP_DEF_PINNING;
2737
} else if (strcmp(name, "map_extra") == 0) {
2738
__u64 map_extra;
2739
2740
if (!get_map_field_long(map_name, btf, m, &map_extra))
2741
return -EINVAL;
2742
map_def->map_extra = map_extra;
2743
map_def->parts |= MAP_DEF_MAP_EXTRA;
2744
} else {
2745
if (strict) {
2746
pr_warn("map '%s': unknown field '%s'.\n", map_name, name);
2747
return -ENOTSUP;
2748
}
2749
pr_debug("map '%s': ignoring unknown field '%s'.\n", map_name, name);
2750
}
2751
}
2752
2753
if (map_def->map_type == BPF_MAP_TYPE_UNSPEC) {
2754
pr_warn("map '%s': map type isn't specified.\n", map_name);
2755
return -EINVAL;
2756
}
2757
2758
return 0;
2759
}
2760
2761
static size_t adjust_ringbuf_sz(size_t sz)
2762
{
2763
__u32 page_sz = sysconf(_SC_PAGE_SIZE);
2764
__u32 mul;
2765
2766
/* if user forgot to set any size, make sure they see error */
2767
if (sz == 0)
2768
return 0;
2769
/* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be
2770
* a power-of-2 multiple of kernel's page size. If user diligently
2771
* satisified these conditions, pass the size through.
2772
*/
2773
if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz))
2774
return sz;
2775
2776
/* Otherwise find closest (page_sz * power_of_2) product bigger than
2777
* user-set size to satisfy both user size request and kernel
2778
* requirements and substitute correct max_entries for map creation.
2779
*/
2780
for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) {
2781
if (mul * page_sz > sz)
2782
return mul * page_sz;
2783
}
2784
2785
/* if it's impossible to satisfy the conditions (i.e., user size is
2786
* very close to UINT_MAX but is not a power-of-2 multiple of
2787
* page_size) then just return original size and let kernel reject it
2788
*/
2789
return sz;
2790
}
2791
2792
static bool map_is_ringbuf(const struct bpf_map *map)
2793
{
2794
return map->def.type == BPF_MAP_TYPE_RINGBUF ||
2795
map->def.type == BPF_MAP_TYPE_USER_RINGBUF;
2796
}
2797
2798
static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def)
2799
{
2800
map->def.type = def->map_type;
2801
map->def.key_size = def->key_size;
2802
map->def.value_size = def->value_size;
2803
map->def.max_entries = def->max_entries;
2804
map->def.map_flags = def->map_flags;
2805
map->map_extra = def->map_extra;
2806
2807
map->numa_node = def->numa_node;
2808
map->btf_key_type_id = def->key_type_id;
2809
map->btf_value_type_id = def->value_type_id;
2810
2811
/* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
2812
if (map_is_ringbuf(map))
2813
map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
2814
2815
if (def->parts & MAP_DEF_MAP_TYPE)
2816
pr_debug("map '%s': found type = %u.\n", map->name, def->map_type);
2817
2818
if (def->parts & MAP_DEF_KEY_TYPE)
2819
pr_debug("map '%s': found key [%u], sz = %u.\n",
2820
map->name, def->key_type_id, def->key_size);
2821
else if (def->parts & MAP_DEF_KEY_SIZE)
2822
pr_debug("map '%s': found key_size = %u.\n", map->name, def->key_size);
2823
2824
if (def->parts & MAP_DEF_VALUE_TYPE)
2825
pr_debug("map '%s': found value [%u], sz = %u.\n",
2826
map->name, def->value_type_id, def->value_size);
2827
else if (def->parts & MAP_DEF_VALUE_SIZE)
2828
pr_debug("map '%s': found value_size = %u.\n", map->name, def->value_size);
2829
2830
if (def->parts & MAP_DEF_MAX_ENTRIES)
2831
pr_debug("map '%s': found max_entries = %u.\n", map->name, def->max_entries);
2832
if (def->parts & MAP_DEF_MAP_FLAGS)
2833
pr_debug("map '%s': found map_flags = 0x%x.\n", map->name, def->map_flags);
2834
if (def->parts & MAP_DEF_MAP_EXTRA)
2835
pr_debug("map '%s': found map_extra = 0x%llx.\n", map->name,
2836
(unsigned long long)def->map_extra);
2837
if (def->parts & MAP_DEF_PINNING)
2838
pr_debug("map '%s': found pinning = %u.\n", map->name, def->pinning);
2839
if (def->parts & MAP_DEF_NUMA_NODE)
2840
pr_debug("map '%s': found numa_node = %u.\n", map->name, def->numa_node);
2841
2842
if (def->parts & MAP_DEF_INNER_MAP)
2843
pr_debug("map '%s': found inner map definition.\n", map->name);
2844
}
2845
2846
static const char *btf_var_linkage_str(__u32 linkage)
2847
{
2848
switch (linkage) {
2849
case BTF_VAR_STATIC: return "static";
2850
case BTF_VAR_GLOBAL_ALLOCATED: return "global";
2851
case BTF_VAR_GLOBAL_EXTERN: return "extern";
2852
default: return "unknown";
2853
}
2854
}
2855
2856
static int bpf_object__init_user_btf_map(struct bpf_object *obj,
2857
const struct btf_type *sec,
2858
int var_idx, int sec_idx,
2859
const Elf_Data *data, bool strict,
2860
const char *pin_root_path)
2861
{
2862
struct btf_map_def map_def = {}, inner_def = {};
2863
const struct btf_type *var, *def;
2864
const struct btf_var_secinfo *vi;
2865
const struct btf_var *var_extra;
2866
const char *map_name;
2867
struct bpf_map *map;
2868
int err;
2869
2870
vi = btf_var_secinfos(sec) + var_idx;
2871
var = btf__type_by_id(obj->btf, vi->type);
2872
var_extra = btf_var(var);
2873
map_name = btf__name_by_offset(obj->btf, var->name_off);
2874
2875
if (map_name == NULL || map_name[0] == '\0') {
2876
pr_warn("map #%d: empty name.\n", var_idx);
2877
return -EINVAL;
2878
}
2879
if ((__u64)vi->offset + vi->size > data->d_size) {
2880
pr_warn("map '%s' BTF data is corrupted.\n", map_name);
2881
return -EINVAL;
2882
}
2883
if (!btf_is_var(var)) {
2884
pr_warn("map '%s': unexpected var kind %s.\n",
2885
map_name, btf_kind_str(var));
2886
return -EINVAL;
2887
}
2888
if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED) {
2889
pr_warn("map '%s': unsupported map linkage %s.\n",
2890
map_name, btf_var_linkage_str(var_extra->linkage));
2891
return -EOPNOTSUPP;
2892
}
2893
2894
def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
2895
if (!btf_is_struct(def)) {
2896
pr_warn("map '%s': unexpected def kind %s.\n",
2897
map_name, btf_kind_str(var));
2898
return -EINVAL;
2899
}
2900
if (def->size > vi->size) {
2901
pr_warn("map '%s': invalid def size.\n", map_name);
2902
return -EINVAL;
2903
}
2904
2905
map = bpf_object__add_map(obj);
2906
if (IS_ERR(map))
2907
return PTR_ERR(map);
2908
map->name = strdup(map_name);
2909
if (!map->name) {
2910
pr_warn("map '%s': failed to alloc map name.\n", map_name);
2911
return -ENOMEM;
2912
}
2913
map->libbpf_type = LIBBPF_MAP_UNSPEC;
2914
map->def.type = BPF_MAP_TYPE_UNSPEC;
2915
map->sec_idx = sec_idx;
2916
map->sec_offset = vi->offset;
2917
map->btf_var_idx = var_idx;
2918
pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
2919
map_name, map->sec_idx, map->sec_offset);
2920
2921
err = parse_btf_map_def(map->name, obj->btf, def, strict, &map_def, &inner_def);
2922
if (err)
2923
return err;
2924
2925
fill_map_from_def(map, &map_def);
2926
2927
if (map_def.pinning == LIBBPF_PIN_BY_NAME) {
2928
err = build_map_pin_path(map, pin_root_path);
2929
if (err) {
2930
pr_warn("map '%s': couldn't build pin path.\n", map->name);
2931
return err;
2932
}
2933
}
2934
2935
if (map_def.parts & MAP_DEF_INNER_MAP) {
2936
map->inner_map = calloc(1, sizeof(*map->inner_map));
2937
if (!map->inner_map)
2938
return -ENOMEM;
2939
map->inner_map->fd = create_placeholder_fd();
2940
if (map->inner_map->fd < 0)
2941
return map->inner_map->fd;
2942
map->inner_map->sec_idx = sec_idx;
2943
map->inner_map->name = malloc(strlen(map_name) + sizeof(".inner") + 1);
2944
if (!map->inner_map->name)
2945
return -ENOMEM;
2946
sprintf(map->inner_map->name, "%s.inner", map_name);
2947
2948
fill_map_from_def(map->inner_map, &inner_def);
2949
}
2950
2951
err = map_fill_btf_type_info(obj, map);
2952
if (err)
2953
return err;
2954
2955
return 0;
2956
}
2957
2958
static int init_arena_map_data(struct bpf_object *obj, struct bpf_map *map,
2959
const char *sec_name, int sec_idx,
2960
void *data, size_t data_sz)
2961
{
2962
const long page_sz = sysconf(_SC_PAGE_SIZE);
2963
size_t mmap_sz;
2964
2965
mmap_sz = bpf_map_mmap_sz(map);
2966
if (roundup(data_sz, page_sz) > mmap_sz) {
2967
pr_warn("elf: sec '%s': declared ARENA map size (%zu) is too small to hold global __arena variables of size %zu\n",
2968
sec_name, mmap_sz, data_sz);
2969
return -E2BIG;
2970
}
2971
2972
obj->arena_data = malloc(data_sz);
2973
if (!obj->arena_data)
2974
return -ENOMEM;
2975
memcpy(obj->arena_data, data, data_sz);
2976
obj->arena_data_sz = data_sz;
2977
2978
/* make bpf_map__init_value() work for ARENA maps */
2979
map->mmaped = obj->arena_data;
2980
2981
return 0;
2982
}
2983
2984
static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
2985
const char *pin_root_path)
2986
{
2987
const struct btf_type *sec = NULL;
2988
int nr_types, i, vlen, err;
2989
const struct btf_type *t;
2990
const char *name;
2991
Elf_Data *data;
2992
Elf_Scn *scn;
2993
2994
if (obj->efile.btf_maps_shndx < 0)
2995
return 0;
2996
2997
scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx);
2998
data = elf_sec_data(obj, scn);
2999
if (!scn || !data) {
3000
pr_warn("elf: failed to get %s map definitions for %s\n",
3001
MAPS_ELF_SEC, obj->path);
3002
return -EINVAL;
3003
}
3004
3005
nr_types = btf__type_cnt(obj->btf);
3006
for (i = 1; i < nr_types; i++) {
3007
t = btf__type_by_id(obj->btf, i);
3008
if (!btf_is_datasec(t))
3009
continue;
3010
name = btf__name_by_offset(obj->btf, t->name_off);
3011
if (strcmp(name, MAPS_ELF_SEC) == 0) {
3012
sec = t;
3013
obj->efile.btf_maps_sec_btf_id = i;
3014
break;
3015
}
3016
}
3017
3018
if (!sec) {
3019
pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC);
3020
return -ENOENT;
3021
}
3022
3023
vlen = btf_vlen(sec);
3024
for (i = 0; i < vlen; i++) {
3025
err = bpf_object__init_user_btf_map(obj, sec, i,
3026
obj->efile.btf_maps_shndx,
3027
data, strict,
3028
pin_root_path);
3029
if (err)
3030
return err;
3031
}
3032
3033
for (i = 0; i < obj->nr_maps; i++) {
3034
struct bpf_map *map = &obj->maps[i];
3035
3036
if (map->def.type != BPF_MAP_TYPE_ARENA)
3037
continue;
3038
3039
if (obj->arena_map_idx >= 0) {
3040
pr_warn("map '%s': only single ARENA map is supported (map '%s' is also ARENA)\n",
3041
map->name, obj->maps[obj->arena_map_idx].name);
3042
return -EINVAL;
3043
}
3044
obj->arena_map_idx = i;
3045
3046
if (obj->efile.arena_data) {
3047
err = init_arena_map_data(obj, map, ARENA_SEC, obj->efile.arena_data_shndx,
3048
obj->efile.arena_data->d_buf,
3049
obj->efile.arena_data->d_size);
3050
if (err)
3051
return err;
3052
}
3053
}
3054
if (obj->efile.arena_data && obj->arena_map_idx < 0) {
3055
pr_warn("elf: sec '%s': to use global __arena variables the ARENA map should be explicitly declared in SEC(\".maps\")\n",
3056
ARENA_SEC);
3057
return -ENOENT;
3058
}
3059
3060
return 0;
3061
}
3062
3063
static int bpf_object__init_maps(struct bpf_object *obj,
3064
const struct bpf_object_open_opts *opts)
3065
{
3066
const char *pin_root_path;
3067
bool strict;
3068
int err = 0;
3069
3070
strict = !OPTS_GET(opts, relaxed_maps, false);
3071
pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
3072
3073
err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
3074
err = err ?: bpf_object__init_global_data_maps(obj);
3075
err = err ?: bpf_object__init_kconfig_map(obj);
3076
err = err ?: bpf_object_init_struct_ops(obj);
3077
3078
return err;
3079
}
3080
3081
static bool section_have_execinstr(struct bpf_object *obj, int idx)
3082
{
3083
Elf64_Shdr *sh;
3084
3085
sh = elf_sec_hdr(obj, elf_sec_by_idx(obj, idx));
3086
if (!sh)
3087
return false;
3088
3089
return sh->sh_flags & SHF_EXECINSTR;
3090
}
3091
3092
static bool starts_with_qmark(const char *s)
3093
{
3094
return s && s[0] == '?';
3095
}
3096
3097
static bool btf_needs_sanitization(struct bpf_object *obj)
3098
{
3099
bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
3100
bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
3101
bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
3102
bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
3103
bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
3104
bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
3105
bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
3106
bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC);
3107
3108
return !has_func || !has_datasec || !has_func_global || !has_float ||
3109
!has_decl_tag || !has_type_tag || !has_enum64 || !has_qmark_datasec;
3110
}
3111
3112
static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
3113
{
3114
bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
3115
bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
3116
bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
3117
bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
3118
bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
3119
bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
3120
bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
3121
bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC);
3122
int enum64_placeholder_id = 0;
3123
struct btf_type *t;
3124
int i, j, vlen;
3125
3126
for (i = 1; i < btf__type_cnt(btf); i++) {
3127
t = (struct btf_type *)btf__type_by_id(btf, i);
3128
3129
if ((!has_datasec && btf_is_var(t)) || (!has_decl_tag && btf_is_decl_tag(t))) {
3130
/* replace VAR/DECL_TAG with INT */
3131
t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0);
3132
/*
3133
* using size = 1 is the safest choice, 4 will be too
3134
* big and cause kernel BTF validation failure if
3135
* original variable took less than 4 bytes
3136
*/
3137
t->size = 1;
3138
*(int *)(t + 1) = BTF_INT_ENC(0, 0, 8);
3139
} else if (!has_datasec && btf_is_datasec(t)) {
3140
/* replace DATASEC with STRUCT */
3141
const struct btf_var_secinfo *v = btf_var_secinfos(t);
3142
struct btf_member *m = btf_members(t);
3143
struct btf_type *vt;
3144
char *name;
3145
3146
name = (char *)btf__name_by_offset(btf, t->name_off);
3147
while (*name) {
3148
if (*name == '.' || *name == '?')
3149
*name = '_';
3150
name++;
3151
}
3152
3153
vlen = btf_vlen(t);
3154
t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen);
3155
for (j = 0; j < vlen; j++, v++, m++) {
3156
/* order of field assignments is important */
3157
m->offset = v->offset * 8;
3158
m->type = v->type;
3159
/* preserve variable name as member name */
3160
vt = (void *)btf__type_by_id(btf, v->type);
3161
m->name_off = vt->name_off;
3162
}
3163
} else if (!has_qmark_datasec && btf_is_datasec(t) &&
3164
starts_with_qmark(btf__name_by_offset(btf, t->name_off))) {
3165
/* replace '?' prefix with '_' for DATASEC names */
3166
char *name;
3167
3168
name = (char *)btf__name_by_offset(btf, t->name_off);
3169
if (name[0] == '?')
3170
name[0] = '_';
3171
} else if (!has_func && btf_is_func_proto(t)) {
3172
/* replace FUNC_PROTO with ENUM */
3173
vlen = btf_vlen(t);
3174
t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen);
3175
t->size = sizeof(__u32); /* kernel enforced */
3176
} else if (!has_func && btf_is_func(t)) {
3177
/* replace FUNC with TYPEDEF */
3178
t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0);
3179
} else if (!has_func_global && btf_is_func(t)) {
3180
/* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */
3181
t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0);
3182
} else if (!has_float && btf_is_float(t)) {
3183
/* replace FLOAT with an equally-sized empty STRUCT;
3184
* since C compilers do not accept e.g. "float" as a
3185
* valid struct name, make it anonymous
3186
*/
3187
t->name_off = 0;
3188
t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0);
3189
} else if (!has_type_tag && btf_is_type_tag(t)) {
3190
/* replace TYPE_TAG with a CONST */
3191
t->name_off = 0;
3192
t->info = BTF_INFO_ENC(BTF_KIND_CONST, 0, 0);
3193
} else if (!has_enum64 && btf_is_enum(t)) {
3194
/* clear the kflag */
3195
t->info = btf_type_info(btf_kind(t), btf_vlen(t), false);
3196
} else if (!has_enum64 && btf_is_enum64(t)) {
3197
/* replace ENUM64 with a union */
3198
struct btf_member *m;
3199
3200
if (enum64_placeholder_id == 0) {
3201
enum64_placeholder_id = btf__add_int(btf, "enum64_placeholder", 1, 0);
3202
if (enum64_placeholder_id < 0)
3203
return enum64_placeholder_id;
3204
3205
t = (struct btf_type *)btf__type_by_id(btf, i);
3206
}
3207
3208
m = btf_members(t);
3209
vlen = btf_vlen(t);
3210
t->info = BTF_INFO_ENC(BTF_KIND_UNION, 0, vlen);
3211
for (j = 0; j < vlen; j++, m++) {
3212
m->type = enum64_placeholder_id;
3213
m->offset = 0;
3214
}
3215
}
3216
}
3217
3218
return 0;
3219
}
3220
3221
static bool libbpf_needs_btf(const struct bpf_object *obj)
3222
{
3223
return obj->efile.btf_maps_shndx >= 0 ||
3224
obj->efile.has_st_ops ||
3225
obj->nr_extern > 0;
3226
}
3227
3228
static bool kernel_needs_btf(const struct bpf_object *obj)
3229
{
3230
return obj->efile.has_st_ops;
3231
}
3232
3233
static int bpf_object__init_btf(struct bpf_object *obj,
3234
Elf_Data *btf_data,
3235
Elf_Data *btf_ext_data)
3236
{
3237
int err = -ENOENT;
3238
3239
if (btf_data) {
3240
obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
3241
err = libbpf_get_error(obj->btf);
3242
if (err) {
3243
obj->btf = NULL;
3244
pr_warn("Error loading ELF section %s: %s.\n", BTF_ELF_SEC, errstr(err));
3245
goto out;
3246
}
3247
/* enforce 8-byte pointers for BPF-targeted BTFs */
3248
btf__set_pointer_size(obj->btf, 8);
3249
}
3250
if (btf_ext_data) {
3251
struct btf_ext_info *ext_segs[3];
3252
int seg_num, sec_num;
3253
3254
if (!obj->btf) {
3255
pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
3256
BTF_EXT_ELF_SEC, BTF_ELF_SEC);
3257
goto out;
3258
}
3259
obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size);
3260
err = libbpf_get_error(obj->btf_ext);
3261
if (err) {
3262
pr_warn("Error loading ELF section %s: %s. Ignored and continue.\n",
3263
BTF_EXT_ELF_SEC, errstr(err));
3264
obj->btf_ext = NULL;
3265
goto out;
3266
}
3267
3268
/* setup .BTF.ext to ELF section mapping */
3269
ext_segs[0] = &obj->btf_ext->func_info;
3270
ext_segs[1] = &obj->btf_ext->line_info;
3271
ext_segs[2] = &obj->btf_ext->core_relo_info;
3272
for (seg_num = 0; seg_num < ARRAY_SIZE(ext_segs); seg_num++) {
3273
struct btf_ext_info *seg = ext_segs[seg_num];
3274
const struct btf_ext_info_sec *sec;
3275
const char *sec_name;
3276
Elf_Scn *scn;
3277
3278
if (seg->sec_cnt == 0)
3279
continue;
3280
3281
seg->sec_idxs = calloc(seg->sec_cnt, sizeof(*seg->sec_idxs));
3282
if (!seg->sec_idxs) {
3283
err = -ENOMEM;
3284
goto out;
3285
}
3286
3287
sec_num = 0;
3288
for_each_btf_ext_sec(seg, sec) {
3289
/* preventively increment index to avoid doing
3290
* this before every continue below
3291
*/
3292
sec_num++;
3293
3294
sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
3295
if (str_is_empty(sec_name))
3296
continue;
3297
scn = elf_sec_by_name(obj, sec_name);
3298
if (!scn)
3299
continue;
3300
3301
seg->sec_idxs[sec_num - 1] = elf_ndxscn(scn);
3302
}
3303
}
3304
}
3305
out:
3306
if (err && libbpf_needs_btf(obj)) {
3307
pr_warn("BTF is required, but is missing or corrupted.\n");
3308
return err;
3309
}
3310
return 0;
3311
}
3312
3313
static int compare_vsi_off(const void *_a, const void *_b)
3314
{
3315
const struct btf_var_secinfo *a = _a;
3316
const struct btf_var_secinfo *b = _b;
3317
3318
return a->offset - b->offset;
3319
}
3320
3321
static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
3322
struct btf_type *t)
3323
{
3324
__u32 size = 0, i, vars = btf_vlen(t);
3325
const char *sec_name = btf__name_by_offset(btf, t->name_off);
3326
struct btf_var_secinfo *vsi;
3327
bool fixup_offsets = false;
3328
int err;
3329
3330
if (!sec_name) {
3331
pr_debug("No name found in string section for DATASEC kind.\n");
3332
return -ENOENT;
3333
}
3334
3335
/* Extern-backing datasecs (.ksyms, .kconfig) have their size and
3336
* variable offsets set at the previous step. Further, not every
3337
* extern BTF VAR has corresponding ELF symbol preserved, so we skip
3338
* all fixups altogether for such sections and go straight to sorting
3339
* VARs within their DATASEC.
3340
*/
3341
if (strcmp(sec_name, KCONFIG_SEC) == 0 || strcmp(sec_name, KSYMS_SEC) == 0)
3342
goto sort_vars;
3343
3344
/* Clang leaves DATASEC size and VAR offsets as zeroes, so we need to
3345
* fix this up. But BPF static linker already fixes this up and fills
3346
* all the sizes and offsets during static linking. So this step has
3347
* to be optional. But the STV_HIDDEN handling is non-optional for any
3348
* non-extern DATASEC, so the variable fixup loop below handles both
3349
* functions at the same time, paying the cost of BTF VAR <-> ELF
3350
* symbol matching just once.
3351
*/
3352
if (t->size == 0) {
3353
err = find_elf_sec_sz(obj, sec_name, &size);
3354
if (err || !size) {
3355
pr_debug("sec '%s': failed to determine size from ELF: size %u, err %s\n",
3356
sec_name, size, errstr(err));
3357
return -ENOENT;
3358
}
3359
3360
t->size = size;
3361
fixup_offsets = true;
3362
}
3363
3364
for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) {
3365
const struct btf_type *t_var;
3366
struct btf_var *var;
3367
const char *var_name;
3368
Elf64_Sym *sym;
3369
3370
t_var = btf__type_by_id(btf, vsi->type);
3371
if (!t_var || !btf_is_var(t_var)) {
3372
pr_debug("sec '%s': unexpected non-VAR type found\n", sec_name);
3373
return -EINVAL;
3374
}
3375
3376
var = btf_var(t_var);
3377
if (var->linkage == BTF_VAR_STATIC || var->linkage == BTF_VAR_GLOBAL_EXTERN)
3378
continue;
3379
3380
var_name = btf__name_by_offset(btf, t_var->name_off);
3381
if (!var_name) {
3382
pr_debug("sec '%s': failed to find name of DATASEC's member #%d\n",
3383
sec_name, i);
3384
return -ENOENT;
3385
}
3386
3387
sym = find_elf_var_sym(obj, var_name);
3388
if (IS_ERR(sym)) {
3389
pr_debug("sec '%s': failed to find ELF symbol for VAR '%s'\n",
3390
sec_name, var_name);
3391
return -ENOENT;
3392
}
3393
3394
if (fixup_offsets)
3395
vsi->offset = sym->st_value;
3396
3397
/* if variable is a global/weak symbol, but has restricted
3398
* (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF VAR
3399
* as static. This follows similar logic for functions (BPF
3400
* subprogs) and influences libbpf's further decisions about
3401
* whether to make global data BPF array maps as
3402
* BPF_F_MMAPABLE.
3403
*/
3404
if (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN
3405
|| ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL)
3406
var->linkage = BTF_VAR_STATIC;
3407
}
3408
3409
sort_vars:
3410
qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off);
3411
return 0;
3412
}
3413
3414
static int bpf_object_fixup_btf(struct bpf_object *obj)
3415
{
3416
int i, n, err = 0;
3417
3418
if (!obj->btf)
3419
return 0;
3420
3421
n = btf__type_cnt(obj->btf);
3422
for (i = 1; i < n; i++) {
3423
struct btf_type *t = btf_type_by_id(obj->btf, i);
3424
3425
/* Loader needs to fix up some of the things compiler
3426
* couldn't get its hands on while emitting BTF. This
3427
* is section size and global variable offset. We use
3428
* the info from the ELF itself for this purpose.
3429
*/
3430
if (btf_is_datasec(t)) {
3431
err = btf_fixup_datasec(obj, obj->btf, t);
3432
if (err)
3433
return err;
3434
}
3435
}
3436
3437
return 0;
3438
}
3439
3440
static bool prog_needs_vmlinux_btf(struct bpf_program *prog)
3441
{
3442
if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
3443
prog->type == BPF_PROG_TYPE_LSM)
3444
return true;
3445
3446
/* BPF_PROG_TYPE_TRACING programs which do not attach to other programs
3447
* also need vmlinux BTF
3448
*/
3449
if (prog->type == BPF_PROG_TYPE_TRACING && !prog->attach_prog_fd)
3450
return true;
3451
3452
return false;
3453
}
3454
3455
static bool map_needs_vmlinux_btf(struct bpf_map *map)
3456
{
3457
return bpf_map__is_struct_ops(map);
3458
}
3459
3460
static bool obj_needs_vmlinux_btf(const struct bpf_object *obj)
3461
{
3462
struct bpf_program *prog;
3463
struct bpf_map *map;
3464
int i;
3465
3466
/* CO-RE relocations need kernel BTF, only when btf_custom_path
3467
* is not specified
3468
*/
3469
if (obj->btf_ext && obj->btf_ext->core_relo_info.len && !obj->btf_custom_path)
3470
return true;
3471
3472
/* Support for typed ksyms needs kernel BTF */
3473
for (i = 0; i < obj->nr_extern; i++) {
3474
const struct extern_desc *ext;
3475
3476
ext = &obj->externs[i];
3477
if (ext->type == EXT_KSYM && ext->ksym.type_id)
3478
return true;
3479
}
3480
3481
bpf_object__for_each_program(prog, obj) {
3482
if (!prog->autoload)
3483
continue;
3484
if (prog_needs_vmlinux_btf(prog))
3485
return true;
3486
}
3487
3488
bpf_object__for_each_map(map, obj) {
3489
if (map_needs_vmlinux_btf(map))
3490
return true;
3491
}
3492
3493
return false;
3494
}
3495
3496
static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force)
3497
{
3498
int err;
3499
3500
/* btf_vmlinux could be loaded earlier */
3501
if (obj->btf_vmlinux || obj->gen_loader)
3502
return 0;
3503
3504
if (!force && !obj_needs_vmlinux_btf(obj))
3505
return 0;
3506
3507
obj->btf_vmlinux = btf__load_vmlinux_btf();
3508
err = libbpf_get_error(obj->btf_vmlinux);
3509
if (err) {
3510
pr_warn("Error loading vmlinux BTF: %s\n", errstr(err));
3511
obj->btf_vmlinux = NULL;
3512
return err;
3513
}
3514
return 0;
3515
}
3516
3517
static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
3518
{
3519
struct btf *kern_btf = obj->btf;
3520
bool btf_mandatory, sanitize;
3521
int i, err = 0;
3522
3523
if (!obj->btf)
3524
return 0;
3525
3526
if (!kernel_supports(obj, FEAT_BTF)) {
3527
if (kernel_needs_btf(obj)) {
3528
err = -EOPNOTSUPP;
3529
goto report;
3530
}
3531
pr_debug("Kernel doesn't support BTF, skipping uploading it.\n");
3532
return 0;
3533
}
3534
3535
/* Even though some subprogs are global/weak, user might prefer more
3536
* permissive BPF verification process that BPF verifier performs for
3537
* static functions, taking into account more context from the caller
3538
* functions. In such case, they need to mark such subprogs with
3539
* __attribute__((visibility("hidden"))) and libbpf will adjust
3540
* corresponding FUNC BTF type to be marked as static and trigger more
3541
* involved BPF verification process.
3542
*/
3543
for (i = 0; i < obj->nr_programs; i++) {
3544
struct bpf_program *prog = &obj->programs[i];
3545
struct btf_type *t;
3546
const char *name;
3547
int j, n;
3548
3549
if (!prog->mark_btf_static || !prog_is_subprog(obj, prog))
3550
continue;
3551
3552
n = btf__type_cnt(obj->btf);
3553
for (j = 1; j < n; j++) {
3554
t = btf_type_by_id(obj->btf, j);
3555
if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL)
3556
continue;
3557
3558
name = btf__str_by_offset(obj->btf, t->name_off);
3559
if (strcmp(name, prog->name) != 0)
3560
continue;
3561
3562
t->info = btf_type_info(BTF_KIND_FUNC, BTF_FUNC_STATIC, 0);
3563
break;
3564
}
3565
}
3566
3567
sanitize = btf_needs_sanitization(obj);
3568
if (sanitize) {
3569
const void *raw_data;
3570
__u32 sz;
3571
3572
/* clone BTF to sanitize a copy and leave the original intact */
3573
raw_data = btf__raw_data(obj->btf, &sz);
3574
kern_btf = btf__new(raw_data, sz);
3575
err = libbpf_get_error(kern_btf);
3576
if (err)
3577
return err;
3578
3579
/* enforce 8-byte pointers for BPF-targeted BTFs */
3580
btf__set_pointer_size(obj->btf, 8);
3581
err = bpf_object__sanitize_btf(obj, kern_btf);
3582
if (err)
3583
return err;
3584
}
3585
3586
if (obj->gen_loader) {
3587
__u32 raw_size = 0;
3588
const void *raw_data = btf__raw_data(kern_btf, &raw_size);
3589
3590
if (!raw_data)
3591
return -ENOMEM;
3592
bpf_gen__load_btf(obj->gen_loader, raw_data, raw_size);
3593
/* Pretend to have valid FD to pass various fd >= 0 checks.
3594
* This fd == 0 will not be used with any syscall and will be reset to -1 eventually.
3595
*/
3596
btf__set_fd(kern_btf, 0);
3597
} else {
3598
/* currently BPF_BTF_LOAD only supports log_level 1 */
3599
err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size,
3600
obj->log_level ? 1 : 0, obj->token_fd);
3601
}
3602
if (sanitize) {
3603
if (!err) {
3604
/* move fd to libbpf's BTF */
3605
btf__set_fd(obj->btf, btf__fd(kern_btf));
3606
btf__set_fd(kern_btf, -1);
3607
}
3608
btf__free(kern_btf);
3609
}
3610
report:
3611
if (err) {
3612
btf_mandatory = kernel_needs_btf(obj);
3613
if (btf_mandatory) {
3614
pr_warn("Error loading .BTF into kernel: %s. BTF is mandatory, can't proceed.\n",
3615
errstr(err));
3616
} else {
3617
pr_info("Error loading .BTF into kernel: %s. BTF is optional, ignoring.\n",
3618
errstr(err));
3619
err = 0;
3620
}
3621
}
3622
return err;
3623
}
3624
3625
static const char *elf_sym_str(const struct bpf_object *obj, size_t off)
3626
{
3627
const char *name;
3628
3629
name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, off);
3630
if (!name) {
3631
pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
3632
off, obj->path, elf_errmsg(-1));
3633
return NULL;
3634
}
3635
3636
return name;
3637
}
3638
3639
static const char *elf_sec_str(const struct bpf_object *obj, size_t off)
3640
{
3641
const char *name;
3642
3643
name = elf_strptr(obj->efile.elf, obj->efile.shstrndx, off);
3644
if (!name) {
3645
pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
3646
off, obj->path, elf_errmsg(-1));
3647
return NULL;
3648
}
3649
3650
return name;
3651
}
3652
3653
static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx)
3654
{
3655
Elf_Scn *scn;
3656
3657
scn = elf_getscn(obj->efile.elf, idx);
3658
if (!scn) {
3659
pr_warn("elf: failed to get section(%zu) from %s: %s\n",
3660
idx, obj->path, elf_errmsg(-1));
3661
return NULL;
3662
}
3663
return scn;
3664
}
3665
3666
static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name)
3667
{
3668
Elf_Scn *scn = NULL;
3669
Elf *elf = obj->efile.elf;
3670
const char *sec_name;
3671
3672
while ((scn = elf_nextscn(elf, scn)) != NULL) {
3673
sec_name = elf_sec_name(obj, scn);
3674
if (!sec_name)
3675
return NULL;
3676
3677
if (strcmp(sec_name, name) != 0)
3678
continue;
3679
3680
return scn;
3681
}
3682
return NULL;
3683
}
3684
3685
static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn)
3686
{
3687
Elf64_Shdr *shdr;
3688
3689
if (!scn)
3690
return NULL;
3691
3692
shdr = elf64_getshdr(scn);
3693
if (!shdr) {
3694
pr_warn("elf: failed to get section(%zu) header from %s: %s\n",
3695
elf_ndxscn(scn), obj->path, elf_errmsg(-1));
3696
return NULL;
3697
}
3698
3699
return shdr;
3700
}
3701
3702
static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn)
3703
{
3704
const char *name;
3705
Elf64_Shdr *sh;
3706
3707
if (!scn)
3708
return NULL;
3709
3710
sh = elf_sec_hdr(obj, scn);
3711
if (!sh)
3712
return NULL;
3713
3714
name = elf_sec_str(obj, sh->sh_name);
3715
if (!name) {
3716
pr_warn("elf: failed to get section(%zu) name from %s: %s\n",
3717
elf_ndxscn(scn), obj->path, elf_errmsg(-1));
3718
return NULL;
3719
}
3720
3721
return name;
3722
}
3723
3724
static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn)
3725
{
3726
Elf_Data *data;
3727
3728
if (!scn)
3729
return NULL;
3730
3731
data = elf_getdata(scn, 0);
3732
if (!data) {
3733
pr_warn("elf: failed to get section(%zu) %s data from %s: %s\n",
3734
elf_ndxscn(scn), elf_sec_name(obj, scn) ?: "<?>",
3735
obj->path, elf_errmsg(-1));
3736
return NULL;
3737
}
3738
3739
return data;
3740
}
3741
3742
static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx)
3743
{
3744
if (idx >= obj->efile.symbols->d_size / sizeof(Elf64_Sym))
3745
return NULL;
3746
3747
return (Elf64_Sym *)obj->efile.symbols->d_buf + idx;
3748
}
3749
3750
static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx)
3751
{
3752
if (idx >= data->d_size / sizeof(Elf64_Rel))
3753
return NULL;
3754
3755
return (Elf64_Rel *)data->d_buf + idx;
3756
}
3757
3758
static bool is_sec_name_dwarf(const char *name)
3759
{
3760
/* approximation, but the actual list is too long */
3761
return str_has_pfx(name, ".debug_");
3762
}
3763
3764
static bool ignore_elf_section(Elf64_Shdr *hdr, const char *name)
3765
{
3766
/* no special handling of .strtab */
3767
if (hdr->sh_type == SHT_STRTAB)
3768
return true;
3769
3770
/* ignore .llvm_addrsig section as well */
3771
if (hdr->sh_type == SHT_LLVM_ADDRSIG)
3772
return true;
3773
3774
/* no subprograms will lead to an empty .text section, ignore it */
3775
if (hdr->sh_type == SHT_PROGBITS && hdr->sh_size == 0 &&
3776
strcmp(name, ".text") == 0)
3777
return true;
3778
3779
/* DWARF sections */
3780
if (is_sec_name_dwarf(name))
3781
return true;
3782
3783
if (str_has_pfx(name, ".rel")) {
3784
name += sizeof(".rel") - 1;
3785
/* DWARF section relocations */
3786
if (is_sec_name_dwarf(name))
3787
return true;
3788
3789
/* .BTF and .BTF.ext don't need relocations */
3790
if (strcmp(name, BTF_ELF_SEC) == 0 ||
3791
strcmp(name, BTF_EXT_ELF_SEC) == 0)
3792
return true;
3793
}
3794
3795
return false;
3796
}
3797
3798
static int cmp_progs(const void *_a, const void *_b)
3799
{
3800
const struct bpf_program *a = _a;
3801
const struct bpf_program *b = _b;
3802
3803
if (a->sec_idx != b->sec_idx)
3804
return a->sec_idx < b->sec_idx ? -1 : 1;
3805
3806
/* sec_insn_off can't be the same within the section */
3807
return a->sec_insn_off < b->sec_insn_off ? -1 : 1;
3808
}
3809
3810
static int bpf_object__elf_collect(struct bpf_object *obj)
3811
{
3812
struct elf_sec_desc *sec_desc;
3813
Elf *elf = obj->efile.elf;
3814
Elf_Data *btf_ext_data = NULL;
3815
Elf_Data *btf_data = NULL;
3816
int idx = 0, err = 0;
3817
const char *name;
3818
Elf_Data *data;
3819
Elf_Scn *scn;
3820
Elf64_Shdr *sh;
3821
3822
/* ELF section indices are 0-based, but sec #0 is special "invalid"
3823
* section. Since section count retrieved by elf_getshdrnum() does
3824
* include sec #0, it is already the necessary size of an array to keep
3825
* all the sections.
3826
*/
3827
if (elf_getshdrnum(obj->efile.elf, &obj->efile.sec_cnt)) {
3828
pr_warn("elf: failed to get the number of sections for %s: %s\n",
3829
obj->path, elf_errmsg(-1));
3830
return -LIBBPF_ERRNO__FORMAT;
3831
}
3832
obj->efile.secs = calloc(obj->efile.sec_cnt, sizeof(*obj->efile.secs));
3833
if (!obj->efile.secs)
3834
return -ENOMEM;
3835
3836
/* a bunch of ELF parsing functionality depends on processing symbols,
3837
* so do the first pass and find the symbol table
3838
*/
3839
scn = NULL;
3840
while ((scn = elf_nextscn(elf, scn)) != NULL) {
3841
sh = elf_sec_hdr(obj, scn);
3842
if (!sh)
3843
return -LIBBPF_ERRNO__FORMAT;
3844
3845
if (sh->sh_type == SHT_SYMTAB) {
3846
if (obj->efile.symbols) {
3847
pr_warn("elf: multiple symbol tables in %s\n", obj->path);
3848
return -LIBBPF_ERRNO__FORMAT;
3849
}
3850
3851
data = elf_sec_data(obj, scn);
3852
if (!data)
3853
return -LIBBPF_ERRNO__FORMAT;
3854
3855
idx = elf_ndxscn(scn);
3856
3857
obj->efile.symbols = data;
3858
obj->efile.symbols_shndx = idx;
3859
obj->efile.strtabidx = sh->sh_link;
3860
}
3861
}
3862
3863
if (!obj->efile.symbols) {
3864
pr_warn("elf: couldn't find symbol table in %s, stripped object file?\n",
3865
obj->path);
3866
return -ENOENT;
3867
}
3868
3869
scn = NULL;
3870
while ((scn = elf_nextscn(elf, scn)) != NULL) {
3871
idx = elf_ndxscn(scn);
3872
sec_desc = &obj->efile.secs[idx];
3873
3874
sh = elf_sec_hdr(obj, scn);
3875
if (!sh)
3876
return -LIBBPF_ERRNO__FORMAT;
3877
3878
name = elf_sec_str(obj, sh->sh_name);
3879
if (!name)
3880
return -LIBBPF_ERRNO__FORMAT;
3881
3882
if (ignore_elf_section(sh, name))
3883
continue;
3884
3885
data = elf_sec_data(obj, scn);
3886
if (!data)
3887
return -LIBBPF_ERRNO__FORMAT;
3888
3889
pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
3890
idx, name, (unsigned long)data->d_size,
3891
(int)sh->sh_link, (unsigned long)sh->sh_flags,
3892
(int)sh->sh_type);
3893
3894
if (strcmp(name, "license") == 0) {
3895
err = bpf_object__init_license(obj, data->d_buf, data->d_size);
3896
if (err)
3897
return err;
3898
} else if (strcmp(name, "version") == 0) {
3899
err = bpf_object__init_kversion(obj, data->d_buf, data->d_size);
3900
if (err)
3901
return err;
3902
} else if (strcmp(name, "maps") == 0) {
3903
pr_warn("elf: legacy map definitions in 'maps' section are not supported by libbpf v1.0+\n");
3904
return -ENOTSUP;
3905
} else if (strcmp(name, MAPS_ELF_SEC) == 0) {
3906
obj->efile.btf_maps_shndx = idx;
3907
} else if (strcmp(name, BTF_ELF_SEC) == 0) {
3908
if (sh->sh_type != SHT_PROGBITS)
3909
return -LIBBPF_ERRNO__FORMAT;
3910
btf_data = data;
3911
} else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
3912
if (sh->sh_type != SHT_PROGBITS)
3913
return -LIBBPF_ERRNO__FORMAT;
3914
btf_ext_data = data;
3915
} else if (sh->sh_type == SHT_SYMTAB) {
3916
/* already processed during the first pass above */
3917
} else if (sh->sh_type == SHT_PROGBITS && data->d_size > 0) {
3918
if (sh->sh_flags & SHF_EXECINSTR) {
3919
if (strcmp(name, ".text") == 0)
3920
obj->efile.text_shndx = idx;
3921
err = bpf_object__add_programs(obj, data, name, idx);
3922
if (err)
3923
return err;
3924
} else if (strcmp(name, DATA_SEC) == 0 ||
3925
str_has_pfx(name, DATA_SEC ".")) {
3926
sec_desc->sec_type = SEC_DATA;
3927
sec_desc->shdr = sh;
3928
sec_desc->data = data;
3929
} else if (strcmp(name, RODATA_SEC) == 0 ||
3930
str_has_pfx(name, RODATA_SEC ".")) {
3931
sec_desc->sec_type = SEC_RODATA;
3932
sec_desc->shdr = sh;
3933
sec_desc->data = data;
3934
} else if (strcmp(name, STRUCT_OPS_SEC) == 0 ||
3935
strcmp(name, STRUCT_OPS_LINK_SEC) == 0 ||
3936
strcmp(name, "?" STRUCT_OPS_SEC) == 0 ||
3937
strcmp(name, "?" STRUCT_OPS_LINK_SEC) == 0) {
3938
sec_desc->sec_type = SEC_ST_OPS;
3939
sec_desc->shdr = sh;
3940
sec_desc->data = data;
3941
obj->efile.has_st_ops = true;
3942
} else if (strcmp(name, ARENA_SEC) == 0) {
3943
obj->efile.arena_data = data;
3944
obj->efile.arena_data_shndx = idx;
3945
} else {
3946
pr_info("elf: skipping unrecognized data section(%d) %s\n",
3947
idx, name);
3948
}
3949
} else if (sh->sh_type == SHT_REL) {
3950
int targ_sec_idx = sh->sh_info; /* points to other section */
3951
3952
if (sh->sh_entsize != sizeof(Elf64_Rel) ||
3953
targ_sec_idx >= obj->efile.sec_cnt)
3954
return -LIBBPF_ERRNO__FORMAT;
3955
3956
/* Only do relo for section with exec instructions */
3957
if (!section_have_execinstr(obj, targ_sec_idx) &&
3958
strcmp(name, ".rel" STRUCT_OPS_SEC) &&
3959
strcmp(name, ".rel" STRUCT_OPS_LINK_SEC) &&
3960
strcmp(name, ".rel?" STRUCT_OPS_SEC) &&
3961
strcmp(name, ".rel?" STRUCT_OPS_LINK_SEC) &&
3962
strcmp(name, ".rel" MAPS_ELF_SEC)) {
3963
pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n",
3964
idx, name, targ_sec_idx,
3965
elf_sec_name(obj, elf_sec_by_idx(obj, targ_sec_idx)) ?: "<?>");
3966
continue;
3967
}
3968
3969
sec_desc->sec_type = SEC_RELO;
3970
sec_desc->shdr = sh;
3971
sec_desc->data = data;
3972
} else if (sh->sh_type == SHT_NOBITS && (strcmp(name, BSS_SEC) == 0 ||
3973
str_has_pfx(name, BSS_SEC "."))) {
3974
sec_desc->sec_type = SEC_BSS;
3975
sec_desc->shdr = sh;
3976
sec_desc->data = data;
3977
} else {
3978
pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name,
3979
(size_t)sh->sh_size);
3980
}
3981
}
3982
3983
if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) {
3984
pr_warn("elf: symbol strings section missing or invalid in %s\n", obj->path);
3985
return -LIBBPF_ERRNO__FORMAT;
3986
}
3987
3988
/* change BPF program insns to native endianness for introspection */
3989
if (!is_native_endianness(obj))
3990
bpf_object_bswap_progs(obj);
3991
3992
/* sort BPF programs by section name and in-section instruction offset
3993
* for faster search
3994
*/
3995
if (obj->nr_programs)
3996
qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs);
3997
3998
return bpf_object__init_btf(obj, btf_data, btf_ext_data);
3999
}
4000
4001
static bool sym_is_extern(const Elf64_Sym *sym)
4002
{
4003
int bind = ELF64_ST_BIND(sym->st_info);
4004
/* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */
4005
return sym->st_shndx == SHN_UNDEF &&
4006
(bind == STB_GLOBAL || bind == STB_WEAK) &&
4007
ELF64_ST_TYPE(sym->st_info) == STT_NOTYPE;
4008
}
4009
4010
static bool sym_is_subprog(const Elf64_Sym *sym, int text_shndx)
4011
{
4012
int bind = ELF64_ST_BIND(sym->st_info);
4013
int type = ELF64_ST_TYPE(sym->st_info);
4014
4015
/* in .text section */
4016
if (sym->st_shndx != text_shndx)
4017
return false;
4018
4019
/* local function */
4020
if (bind == STB_LOCAL && type == STT_SECTION)
4021
return true;
4022
4023
/* global function */
4024
return (bind == STB_GLOBAL || bind == STB_WEAK) && type == STT_FUNC;
4025
}
4026
4027
static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
4028
{
4029
const struct btf_type *t;
4030
const char *tname;
4031
int i, n;
4032
4033
if (!btf)
4034
return -ESRCH;
4035
4036
n = btf__type_cnt(btf);
4037
for (i = 1; i < n; i++) {
4038
t = btf__type_by_id(btf, i);
4039
4040
if (!btf_is_var(t) && !btf_is_func(t))
4041
continue;
4042
4043
tname = btf__name_by_offset(btf, t->name_off);
4044
if (strcmp(tname, ext_name))
4045
continue;
4046
4047
if (btf_is_var(t) &&
4048
btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN)
4049
return -EINVAL;
4050
4051
if (btf_is_func(t) && btf_func_linkage(t) != BTF_FUNC_EXTERN)
4052
return -EINVAL;
4053
4054
return i;
4055
}
4056
4057
return -ENOENT;
4058
}
4059
4060
static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) {
4061
const struct btf_var_secinfo *vs;
4062
const struct btf_type *t;
4063
int i, j, n;
4064
4065
if (!btf)
4066
return -ESRCH;
4067
4068
n = btf__type_cnt(btf);
4069
for (i = 1; i < n; i++) {
4070
t = btf__type_by_id(btf, i);
4071
4072
if (!btf_is_datasec(t))
4073
continue;
4074
4075
vs = btf_var_secinfos(t);
4076
for (j = 0; j < btf_vlen(t); j++, vs++) {
4077
if (vs->type == ext_btf_id)
4078
return i;
4079
}
4080
}
4081
4082
return -ENOENT;
4083
}
4084
4085
static enum kcfg_type find_kcfg_type(const struct btf *btf, int id,
4086
bool *is_signed)
4087
{
4088
const struct btf_type *t;
4089
const char *name;
4090
4091
t = skip_mods_and_typedefs(btf, id, NULL);
4092
name = btf__name_by_offset(btf, t->name_off);
4093
4094
if (is_signed)
4095
*is_signed = false;
4096
switch (btf_kind(t)) {
4097
case BTF_KIND_INT: {
4098
int enc = btf_int_encoding(t);
4099
4100
if (enc & BTF_INT_BOOL)
4101
return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN;
4102
if (is_signed)
4103
*is_signed = enc & BTF_INT_SIGNED;
4104
if (t->size == 1)
4105
return KCFG_CHAR;
4106
if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1)))
4107
return KCFG_UNKNOWN;
4108
return KCFG_INT;
4109
}
4110
case BTF_KIND_ENUM:
4111
if (t->size != 4)
4112
return KCFG_UNKNOWN;
4113
if (strcmp(name, "libbpf_tristate"))
4114
return KCFG_UNKNOWN;
4115
return KCFG_TRISTATE;
4116
case BTF_KIND_ENUM64:
4117
if (strcmp(name, "libbpf_tristate"))
4118
return KCFG_UNKNOWN;
4119
return KCFG_TRISTATE;
4120
case BTF_KIND_ARRAY:
4121
if (btf_array(t)->nelems == 0)
4122
return KCFG_UNKNOWN;
4123
if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR)
4124
return KCFG_UNKNOWN;
4125
return KCFG_CHAR_ARR;
4126
default:
4127
return KCFG_UNKNOWN;
4128
}
4129
}
4130
4131
static int cmp_externs(const void *_a, const void *_b)
4132
{
4133
const struct extern_desc *a = _a;
4134
const struct extern_desc *b = _b;
4135
4136
if (a->type != b->type)
4137
return a->type < b->type ? -1 : 1;
4138
4139
if (a->type == EXT_KCFG) {
4140
/* descending order by alignment requirements */
4141
if (a->kcfg.align != b->kcfg.align)
4142
return a->kcfg.align > b->kcfg.align ? -1 : 1;
4143
/* ascending order by size, within same alignment class */
4144
if (a->kcfg.sz != b->kcfg.sz)
4145
return a->kcfg.sz < b->kcfg.sz ? -1 : 1;
4146
}
4147
4148
/* resolve ties by name */
4149
return strcmp(a->name, b->name);
4150
}
4151
4152
static int find_int_btf_id(const struct btf *btf)
4153
{
4154
const struct btf_type *t;
4155
int i, n;
4156
4157
n = btf__type_cnt(btf);
4158
for (i = 1; i < n; i++) {
4159
t = btf__type_by_id(btf, i);
4160
4161
if (btf_is_int(t) && btf_int_bits(t) == 32)
4162
return i;
4163
}
4164
4165
return 0;
4166
}
4167
4168
static int add_dummy_ksym_var(struct btf *btf)
4169
{
4170
int i, int_btf_id, sec_btf_id, dummy_var_btf_id;
4171
const struct btf_var_secinfo *vs;
4172
const struct btf_type *sec;
4173
4174
if (!btf)
4175
return 0;
4176
4177
sec_btf_id = btf__find_by_name_kind(btf, KSYMS_SEC,
4178
BTF_KIND_DATASEC);
4179
if (sec_btf_id < 0)
4180
return 0;
4181
4182
sec = btf__type_by_id(btf, sec_btf_id);
4183
vs = btf_var_secinfos(sec);
4184
for (i = 0; i < btf_vlen(sec); i++, vs++) {
4185
const struct btf_type *vt;
4186
4187
vt = btf__type_by_id(btf, vs->type);
4188
if (btf_is_func(vt))
4189
break;
4190
}
4191
4192
/* No func in ksyms sec. No need to add dummy var. */
4193
if (i == btf_vlen(sec))
4194
return 0;
4195
4196
int_btf_id = find_int_btf_id(btf);
4197
dummy_var_btf_id = btf__add_var(btf,
4198
"dummy_ksym",
4199
BTF_VAR_GLOBAL_ALLOCATED,
4200
int_btf_id);
4201
if (dummy_var_btf_id < 0)
4202
pr_warn("cannot create a dummy_ksym var\n");
4203
4204
return dummy_var_btf_id;
4205
}
4206
4207
static int bpf_object__collect_externs(struct bpf_object *obj)
4208
{
4209
struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL;
4210
const struct btf_type *t;
4211
struct extern_desc *ext;
4212
int i, n, off, dummy_var_btf_id;
4213
const char *ext_name, *sec_name;
4214
size_t ext_essent_len;
4215
Elf_Scn *scn;
4216
Elf64_Shdr *sh;
4217
4218
if (!obj->efile.symbols)
4219
return 0;
4220
4221
scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx);
4222
sh = elf_sec_hdr(obj, scn);
4223
if (!sh || sh->sh_entsize != sizeof(Elf64_Sym))
4224
return -LIBBPF_ERRNO__FORMAT;
4225
4226
dummy_var_btf_id = add_dummy_ksym_var(obj->btf);
4227
if (dummy_var_btf_id < 0)
4228
return dummy_var_btf_id;
4229
4230
n = sh->sh_size / sh->sh_entsize;
4231
pr_debug("looking for externs among %d symbols...\n", n);
4232
4233
for (i = 0; i < n; i++) {
4234
Elf64_Sym *sym = elf_sym_by_idx(obj, i);
4235
4236
if (!sym)
4237
return -LIBBPF_ERRNO__FORMAT;
4238
if (!sym_is_extern(sym))
4239
continue;
4240
ext_name = elf_sym_str(obj, sym->st_name);
4241
if (!ext_name || !ext_name[0])
4242
continue;
4243
4244
ext = obj->externs;
4245
ext = libbpf_reallocarray(ext, obj->nr_extern + 1, sizeof(*ext));
4246
if (!ext)
4247
return -ENOMEM;
4248
obj->externs = ext;
4249
ext = &ext[obj->nr_extern];
4250
memset(ext, 0, sizeof(*ext));
4251
obj->nr_extern++;
4252
4253
ext->btf_id = find_extern_btf_id(obj->btf, ext_name);
4254
if (ext->btf_id <= 0) {
4255
pr_warn("failed to find BTF for extern '%s': %d\n",
4256
ext_name, ext->btf_id);
4257
return ext->btf_id;
4258
}
4259
t = btf__type_by_id(obj->btf, ext->btf_id);
4260
ext->name = strdup(btf__name_by_offset(obj->btf, t->name_off));
4261
if (!ext->name)
4262
return -ENOMEM;
4263
ext->sym_idx = i;
4264
ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK;
4265
4266
ext_essent_len = bpf_core_essential_name_len(ext->name);
4267
ext->essent_name = NULL;
4268
if (ext_essent_len != strlen(ext->name)) {
4269
ext->essent_name = strndup(ext->name, ext_essent_len);
4270
if (!ext->essent_name)
4271
return -ENOMEM;
4272
}
4273
4274
ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id);
4275
if (ext->sec_btf_id <= 0) {
4276
pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n",
4277
ext_name, ext->btf_id, ext->sec_btf_id);
4278
return ext->sec_btf_id;
4279
}
4280
sec = (void *)btf__type_by_id(obj->btf, ext->sec_btf_id);
4281
sec_name = btf__name_by_offset(obj->btf, sec->name_off);
4282
4283
if (strcmp(sec_name, KCONFIG_SEC) == 0) {
4284
if (btf_is_func(t)) {
4285
pr_warn("extern function %s is unsupported under %s section\n",
4286
ext->name, KCONFIG_SEC);
4287
return -ENOTSUP;
4288
}
4289
kcfg_sec = sec;
4290
ext->type = EXT_KCFG;
4291
ext->kcfg.sz = btf__resolve_size(obj->btf, t->type);
4292
if (ext->kcfg.sz <= 0) {
4293
pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n",
4294
ext_name, ext->kcfg.sz);
4295
return ext->kcfg.sz;
4296
}
4297
ext->kcfg.align = btf__align_of(obj->btf, t->type);
4298
if (ext->kcfg.align <= 0) {
4299
pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n",
4300
ext_name, ext->kcfg.align);
4301
return -EINVAL;
4302
}
4303
ext->kcfg.type = find_kcfg_type(obj->btf, t->type,
4304
&ext->kcfg.is_signed);
4305
if (ext->kcfg.type == KCFG_UNKNOWN) {
4306
pr_warn("extern (kcfg) '%s': type is unsupported\n", ext_name);
4307
return -ENOTSUP;
4308
}
4309
} else if (strcmp(sec_name, KSYMS_SEC) == 0) {
4310
ksym_sec = sec;
4311
ext->type = EXT_KSYM;
4312
skip_mods_and_typedefs(obj->btf, t->type,
4313
&ext->ksym.type_id);
4314
} else {
4315
pr_warn("unrecognized extern section '%s'\n", sec_name);
4316
return -ENOTSUP;
4317
}
4318
}
4319
pr_debug("collected %d externs total\n", obj->nr_extern);
4320
4321
if (!obj->nr_extern)
4322
return 0;
4323
4324
/* sort externs by type, for kcfg ones also by (align, size, name) */
4325
qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
4326
4327
/* for .ksyms section, we need to turn all externs into allocated
4328
* variables in BTF to pass kernel verification; we do this by
4329
* pretending that each extern is a 8-byte variable
4330
*/
4331
if (ksym_sec) {
4332
/* find existing 4-byte integer type in BTF to use for fake
4333
* extern variables in DATASEC
4334
*/
4335
int int_btf_id = find_int_btf_id(obj->btf);
4336
/* For extern function, a dummy_var added earlier
4337
* will be used to replace the vs->type and
4338
* its name string will be used to refill
4339
* the missing param's name.
4340
*/
4341
const struct btf_type *dummy_var;
4342
4343
dummy_var = btf__type_by_id(obj->btf, dummy_var_btf_id);
4344
for (i = 0; i < obj->nr_extern; i++) {
4345
ext = &obj->externs[i];
4346
if (ext->type != EXT_KSYM)
4347
continue;
4348
pr_debug("extern (ksym) #%d: symbol %d, name %s\n",
4349
i, ext->sym_idx, ext->name);
4350
}
4351
4352
sec = ksym_sec;
4353
n = btf_vlen(sec);
4354
for (i = 0, off = 0; i < n; i++, off += sizeof(int)) {
4355
struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
4356
struct btf_type *vt;
4357
4358
vt = (void *)btf__type_by_id(obj->btf, vs->type);
4359
ext_name = btf__name_by_offset(obj->btf, vt->name_off);
4360
ext = find_extern_by_name(obj, ext_name);
4361
if (!ext) {
4362
pr_warn("failed to find extern definition for BTF %s '%s'\n",
4363
btf_kind_str(vt), ext_name);
4364
return -ESRCH;
4365
}
4366
if (btf_is_func(vt)) {
4367
const struct btf_type *func_proto;
4368
struct btf_param *param;
4369
int j;
4370
4371
func_proto = btf__type_by_id(obj->btf,
4372
vt->type);
4373
param = btf_params(func_proto);
4374
/* Reuse the dummy_var string if the
4375
* func proto does not have param name.
4376
*/
4377
for (j = 0; j < btf_vlen(func_proto); j++)
4378
if (param[j].type && !param[j].name_off)
4379
param[j].name_off =
4380
dummy_var->name_off;
4381
vs->type = dummy_var_btf_id;
4382
vt->info &= ~0xffff;
4383
vt->info |= BTF_FUNC_GLOBAL;
4384
} else {
4385
btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
4386
vt->type = int_btf_id;
4387
}
4388
vs->offset = off;
4389
vs->size = sizeof(int);
4390
}
4391
sec->size = off;
4392
}
4393
4394
if (kcfg_sec) {
4395
sec = kcfg_sec;
4396
/* for kcfg externs calculate their offsets within a .kconfig map */
4397
off = 0;
4398
for (i = 0; i < obj->nr_extern; i++) {
4399
ext = &obj->externs[i];
4400
if (ext->type != EXT_KCFG)
4401
continue;
4402
4403
ext->kcfg.data_off = roundup(off, ext->kcfg.align);
4404
off = ext->kcfg.data_off + ext->kcfg.sz;
4405
pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n",
4406
i, ext->sym_idx, ext->kcfg.data_off, ext->name);
4407
}
4408
sec->size = off;
4409
n = btf_vlen(sec);
4410
for (i = 0; i < n; i++) {
4411
struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
4412
4413
t = btf__type_by_id(obj->btf, vs->type);
4414
ext_name = btf__name_by_offset(obj->btf, t->name_off);
4415
ext = find_extern_by_name(obj, ext_name);
4416
if (!ext) {
4417
pr_warn("failed to find extern definition for BTF var '%s'\n",
4418
ext_name);
4419
return -ESRCH;
4420
}
4421
btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
4422
vs->offset = ext->kcfg.data_off;
4423
}
4424
}
4425
return 0;
4426
}
4427
4428
static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog)
4429
{
4430
return prog->sec_idx == obj->efile.text_shndx;
4431
}
4432
4433
struct bpf_program *
4434
bpf_object__find_program_by_name(const struct bpf_object *obj,
4435
const char *name)
4436
{
4437
struct bpf_program *prog;
4438
4439
bpf_object__for_each_program(prog, obj) {
4440
if (prog_is_subprog(obj, prog))
4441
continue;
4442
if (!strcmp(prog->name, name))
4443
return prog;
4444
}
4445
return errno = ENOENT, NULL;
4446
}
4447
4448
static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
4449
int shndx)
4450
{
4451
switch (obj->efile.secs[shndx].sec_type) {
4452
case SEC_BSS:
4453
case SEC_DATA:
4454
case SEC_RODATA:
4455
return true;
4456
default:
4457
return false;
4458
}
4459
}
4460
4461
static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,
4462
int shndx)
4463
{
4464
return shndx == obj->efile.btf_maps_shndx;
4465
}
4466
4467
static enum libbpf_map_type
4468
bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
4469
{
4470
if (shndx == obj->efile.symbols_shndx)
4471
return LIBBPF_MAP_KCONFIG;
4472
4473
switch (obj->efile.secs[shndx].sec_type) {
4474
case SEC_BSS:
4475
return LIBBPF_MAP_BSS;
4476
case SEC_DATA:
4477
return LIBBPF_MAP_DATA;
4478
case SEC_RODATA:
4479
return LIBBPF_MAP_RODATA;
4480
default:
4481
return LIBBPF_MAP_UNSPEC;
4482
}
4483
}
4484
4485
static int bpf_prog_compute_hash(struct bpf_program *prog)
4486
{
4487
struct bpf_insn *purged;
4488
int i, err = 0;
4489
4490
purged = calloc(prog->insns_cnt, BPF_INSN_SZ);
4491
if (!purged)
4492
return -ENOMEM;
4493
4494
/* If relocations have been done, the map_fd needs to be
4495
* discarded for the digest calculation.
4496
*/
4497
for (i = 0; i < prog->insns_cnt; i++) {
4498
purged[i] = prog->insns[i];
4499
if (purged[i].code == (BPF_LD | BPF_IMM | BPF_DW) &&
4500
(purged[i].src_reg == BPF_PSEUDO_MAP_FD ||
4501
purged[i].src_reg == BPF_PSEUDO_MAP_VALUE)) {
4502
purged[i].imm = 0;
4503
i++;
4504
if (i >= prog->insns_cnt ||
4505
prog->insns[i].code != 0 ||
4506
prog->insns[i].dst_reg != 0 ||
4507
prog->insns[i].src_reg != 0 ||
4508
prog->insns[i].off != 0) {
4509
err = -EINVAL;
4510
goto out;
4511
}
4512
purged[i] = prog->insns[i];
4513
purged[i].imm = 0;
4514
}
4515
}
4516
libbpf_sha256(purged, prog->insns_cnt * sizeof(struct bpf_insn),
4517
prog->hash);
4518
out:
4519
free(purged);
4520
return err;
4521
}
4522
4523
static int bpf_program__record_reloc(struct bpf_program *prog,
4524
struct reloc_desc *reloc_desc,
4525
__u32 insn_idx, const char *sym_name,
4526
const Elf64_Sym *sym, const Elf64_Rel *rel)
4527
{
4528
struct bpf_insn *insn = &prog->insns[insn_idx];
4529
size_t map_idx, nr_maps = prog->obj->nr_maps;
4530
struct bpf_object *obj = prog->obj;
4531
__u32 shdr_idx = sym->st_shndx;
4532
enum libbpf_map_type type;
4533
const char *sym_sec_name;
4534
struct bpf_map *map;
4535
4536
if (!is_call_insn(insn) && !is_ldimm64_insn(insn)) {
4537
pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
4538
prog->name, sym_name, insn_idx, insn->code);
4539
return -LIBBPF_ERRNO__RELOC;
4540
}
4541
4542
if (sym_is_extern(sym)) {
4543
int sym_idx = ELF64_R_SYM(rel->r_info);
4544
int i, n = obj->nr_extern;
4545
struct extern_desc *ext;
4546
4547
for (i = 0; i < n; i++) {
4548
ext = &obj->externs[i];
4549
if (ext->sym_idx == sym_idx)
4550
break;
4551
}
4552
if (i >= n) {
4553
pr_warn("prog '%s': extern relo failed to find extern for '%s' (%d)\n",
4554
prog->name, sym_name, sym_idx);
4555
return -LIBBPF_ERRNO__RELOC;
4556
}
4557
pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n",
4558
prog->name, i, ext->name, ext->sym_idx, insn_idx);
4559
if (insn->code == (BPF_JMP | BPF_CALL))
4560
reloc_desc->type = RELO_EXTERN_CALL;
4561
else
4562
reloc_desc->type = RELO_EXTERN_LD64;
4563
reloc_desc->insn_idx = insn_idx;
4564
reloc_desc->ext_idx = i;
4565
return 0;
4566
}
4567
4568
/* sub-program call relocation */
4569
if (is_call_insn(insn)) {
4570
if (insn->src_reg != BPF_PSEUDO_CALL) {
4571
pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name);
4572
return -LIBBPF_ERRNO__RELOC;
4573
}
4574
/* text_shndx can be 0, if no default "main" program exists */
4575
if (!shdr_idx || shdr_idx != obj->efile.text_shndx) {
4576
sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
4577
pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n",
4578
prog->name, sym_name, sym_sec_name);
4579
return -LIBBPF_ERRNO__RELOC;
4580
}
4581
if (sym->st_value % BPF_INSN_SZ) {
4582
pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n",
4583
prog->name, sym_name, (size_t)sym->st_value);
4584
return -LIBBPF_ERRNO__RELOC;
4585
}
4586
reloc_desc->type = RELO_CALL;
4587
reloc_desc->insn_idx = insn_idx;
4588
reloc_desc->sym_off = sym->st_value;
4589
return 0;
4590
}
4591
4592
if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
4593
pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n",
4594
prog->name, sym_name, shdr_idx);
4595
return -LIBBPF_ERRNO__RELOC;
4596
}
4597
4598
/* loading subprog addresses */
4599
if (sym_is_subprog(sym, obj->efile.text_shndx)) {
4600
/* global_func: sym->st_value = offset in the section, insn->imm = 0.
4601
* local_func: sym->st_value = 0, insn->imm = offset in the section.
4602
*/
4603
if ((sym->st_value % BPF_INSN_SZ) || (insn->imm % BPF_INSN_SZ)) {
4604
pr_warn("prog '%s': bad subprog addr relo against '%s' at offset %zu+%d\n",
4605
prog->name, sym_name, (size_t)sym->st_value, insn->imm);
4606
return -LIBBPF_ERRNO__RELOC;
4607
}
4608
4609
reloc_desc->type = RELO_SUBPROG_ADDR;
4610
reloc_desc->insn_idx = insn_idx;
4611
reloc_desc->sym_off = sym->st_value;
4612
return 0;
4613
}
4614
4615
type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
4616
sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
4617
4618
/* arena data relocation */
4619
if (shdr_idx == obj->efile.arena_data_shndx) {
4620
if (obj->arena_map_idx < 0) {
4621
pr_warn("prog '%s': bad arena data relocation at insn %u, no arena maps defined\n",
4622
prog->name, insn_idx);
4623
return -LIBBPF_ERRNO__RELOC;
4624
}
4625
reloc_desc->type = RELO_DATA;
4626
reloc_desc->insn_idx = insn_idx;
4627
reloc_desc->map_idx = obj->arena_map_idx;
4628
reloc_desc->sym_off = sym->st_value;
4629
4630
map = &obj->maps[obj->arena_map_idx];
4631
pr_debug("prog '%s': found arena map %d (%s, sec %d, off %zu) for insn %u\n",
4632
prog->name, obj->arena_map_idx, map->name, map->sec_idx,
4633
map->sec_offset, insn_idx);
4634
return 0;
4635
}
4636
4637
/* generic map reference relocation */
4638
if (type == LIBBPF_MAP_UNSPEC) {
4639
if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
4640
pr_warn("prog '%s': bad map relo against '%s' in section '%s'\n",
4641
prog->name, sym_name, sym_sec_name);
4642
return -LIBBPF_ERRNO__RELOC;
4643
}
4644
for (map_idx = 0; map_idx < nr_maps; map_idx++) {
4645
map = &obj->maps[map_idx];
4646
if (map->libbpf_type != type ||
4647
map->sec_idx != sym->st_shndx ||
4648
map->sec_offset != sym->st_value)
4649
continue;
4650
pr_debug("prog '%s': found map %zd (%s, sec %d, off %zu) for insn #%u\n",
4651
prog->name, map_idx, map->name, map->sec_idx,
4652
map->sec_offset, insn_idx);
4653
break;
4654
}
4655
if (map_idx >= nr_maps) {
4656
pr_warn("prog '%s': map relo failed to find map for section '%s', off %zu\n",
4657
prog->name, sym_sec_name, (size_t)sym->st_value);
4658
return -LIBBPF_ERRNO__RELOC;
4659
}
4660
reloc_desc->type = RELO_LD64;
4661
reloc_desc->insn_idx = insn_idx;
4662
reloc_desc->map_idx = map_idx;
4663
reloc_desc->sym_off = 0; /* sym->st_value determines map_idx */
4664
return 0;
4665
}
4666
4667
/* global data map relocation */
4668
if (!bpf_object__shndx_is_data(obj, shdr_idx)) {
4669
pr_warn("prog '%s': bad data relo against section '%s'\n",
4670
prog->name, sym_sec_name);
4671
return -LIBBPF_ERRNO__RELOC;
4672
}
4673
for (map_idx = 0; map_idx < nr_maps; map_idx++) {
4674
map = &obj->maps[map_idx];
4675
if (map->libbpf_type != type || map->sec_idx != sym->st_shndx)
4676
continue;
4677
pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n",
4678
prog->name, map_idx, map->name, map->sec_idx,
4679
map->sec_offset, insn_idx);
4680
break;
4681
}
4682
if (map_idx >= nr_maps) {
4683
pr_warn("prog '%s': data relo failed to find map for section '%s'\n",
4684
prog->name, sym_sec_name);
4685
return -LIBBPF_ERRNO__RELOC;
4686
}
4687
4688
reloc_desc->type = RELO_DATA;
4689
reloc_desc->insn_idx = insn_idx;
4690
reloc_desc->map_idx = map_idx;
4691
reloc_desc->sym_off = sym->st_value;
4692
return 0;
4693
}
4694
4695
static bool prog_contains_insn(const struct bpf_program *prog, size_t insn_idx)
4696
{
4697
return insn_idx >= prog->sec_insn_off &&
4698
insn_idx < prog->sec_insn_off + prog->sec_insn_cnt;
4699
}
4700
4701
static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj,
4702
size_t sec_idx, size_t insn_idx)
4703
{
4704
int l = 0, r = obj->nr_programs - 1, m;
4705
struct bpf_program *prog;
4706
4707
if (!obj->nr_programs)
4708
return NULL;
4709
4710
while (l < r) {
4711
m = l + (r - l + 1) / 2;
4712
prog = &obj->programs[m];
4713
4714
if (prog->sec_idx < sec_idx ||
4715
(prog->sec_idx == sec_idx && prog->sec_insn_off <= insn_idx))
4716
l = m;
4717
else
4718
r = m - 1;
4719
}
4720
/* matching program could be at index l, but it still might be the
4721
* wrong one, so we need to double check conditions for the last time
4722
*/
4723
prog = &obj->programs[l];
4724
if (prog->sec_idx == sec_idx && prog_contains_insn(prog, insn_idx))
4725
return prog;
4726
return NULL;
4727
}
4728
4729
static int
4730
bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Data *data)
4731
{
4732
const char *relo_sec_name, *sec_name;
4733
size_t sec_idx = shdr->sh_info, sym_idx;
4734
struct bpf_program *prog;
4735
struct reloc_desc *relos;
4736
int err, i, nrels;
4737
const char *sym_name;
4738
__u32 insn_idx;
4739
Elf_Scn *scn;
4740
Elf_Data *scn_data;
4741
Elf64_Sym *sym;
4742
Elf64_Rel *rel;
4743
4744
if (sec_idx >= obj->efile.sec_cnt)
4745
return -EINVAL;
4746
4747
scn = elf_sec_by_idx(obj, sec_idx);
4748
scn_data = elf_sec_data(obj, scn);
4749
if (!scn_data)
4750
return -LIBBPF_ERRNO__FORMAT;
4751
4752
relo_sec_name = elf_sec_str(obj, shdr->sh_name);
4753
sec_name = elf_sec_name(obj, scn);
4754
if (!relo_sec_name || !sec_name)
4755
return -EINVAL;
4756
4757
pr_debug("sec '%s': collecting relocation for section(%zu) '%s'\n",
4758
relo_sec_name, sec_idx, sec_name);
4759
nrels = shdr->sh_size / shdr->sh_entsize;
4760
4761
for (i = 0; i < nrels; i++) {
4762
rel = elf_rel_by_idx(data, i);
4763
if (!rel) {
4764
pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i);
4765
return -LIBBPF_ERRNO__FORMAT;
4766
}
4767
4768
sym_idx = ELF64_R_SYM(rel->r_info);
4769
sym = elf_sym_by_idx(obj, sym_idx);
4770
if (!sym) {
4771
pr_warn("sec '%s': symbol #%zu not found for relo #%d\n",
4772
relo_sec_name, sym_idx, i);
4773
return -LIBBPF_ERRNO__FORMAT;
4774
}
4775
4776
if (sym->st_shndx >= obj->efile.sec_cnt) {
4777
pr_warn("sec '%s': corrupted symbol #%zu pointing to invalid section #%zu for relo #%d\n",
4778
relo_sec_name, sym_idx, (size_t)sym->st_shndx, i);
4779
return -LIBBPF_ERRNO__FORMAT;
4780
}
4781
4782
if (rel->r_offset % BPF_INSN_SZ || rel->r_offset >= scn_data->d_size) {
4783
pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n",
4784
relo_sec_name, (size_t)rel->r_offset, i);
4785
return -LIBBPF_ERRNO__FORMAT;
4786
}
4787
4788
insn_idx = rel->r_offset / BPF_INSN_SZ;
4789
/* relocations against static functions are recorded as
4790
* relocations against the section that contains a function;
4791
* in such case, symbol will be STT_SECTION and sym.st_name
4792
* will point to empty string (0), so fetch section name
4793
* instead
4794
*/
4795
if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION && sym->st_name == 0)
4796
sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym->st_shndx));
4797
else
4798
sym_name = elf_sym_str(obj, sym->st_name);
4799
sym_name = sym_name ?: "<?";
4800
4801
pr_debug("sec '%s': relo #%d: insn #%u against '%s'\n",
4802
relo_sec_name, i, insn_idx, sym_name);
4803
4804
prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
4805
if (!prog) {
4806
pr_debug("sec '%s': relo #%d: couldn't find program in section '%s' for insn #%u, probably overridden weak function, skipping...\n",
4807
relo_sec_name, i, sec_name, insn_idx);
4808
continue;
4809
}
4810
4811
relos = libbpf_reallocarray(prog->reloc_desc,
4812
prog->nr_reloc + 1, sizeof(*relos));
4813
if (!relos)
4814
return -ENOMEM;
4815
prog->reloc_desc = relos;
4816
4817
/* adjust insn_idx to local BPF program frame of reference */
4818
insn_idx -= prog->sec_insn_off;
4819
err = bpf_program__record_reloc(prog, &relos[prog->nr_reloc],
4820
insn_idx, sym_name, sym, rel);
4821
if (err)
4822
return err;
4823
4824
prog->nr_reloc++;
4825
}
4826
return 0;
4827
}
4828
4829
static int map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map)
4830
{
4831
int id;
4832
4833
if (!obj->btf)
4834
return -ENOENT;
4835
4836
/* if it's BTF-defined map, we don't need to search for type IDs.
4837
* For struct_ops map, it does not need btf_key_type_id and
4838
* btf_value_type_id.
4839
*/
4840
if (map->sec_idx == obj->efile.btf_maps_shndx || bpf_map__is_struct_ops(map))
4841
return 0;
4842
4843
/*
4844
* LLVM annotates global data differently in BTF, that is,
4845
* only as '.data', '.bss' or '.rodata'.
4846
*/
4847
if (!bpf_map__is_internal(map))
4848
return -ENOENT;
4849
4850
id = btf__find_by_name(obj->btf, map->real_name);
4851
if (id < 0)
4852
return id;
4853
4854
map->btf_key_type_id = 0;
4855
map->btf_value_type_id = id;
4856
return 0;
4857
}
4858
4859
static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info)
4860
{
4861
char file[PATH_MAX], buff[4096];
4862
FILE *fp;
4863
__u32 val;
4864
int err;
4865
4866
snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
4867
memset(info, 0, sizeof(*info));
4868
4869
fp = fopen(file, "re");
4870
if (!fp) {
4871
err = -errno;
4872
pr_warn("failed to open %s: %s. No procfs support?\n", file,
4873
errstr(err));
4874
return err;
4875
}
4876
4877
while (fgets(buff, sizeof(buff), fp)) {
4878
if (sscanf(buff, "map_type:\t%u", &val) == 1)
4879
info->type = val;
4880
else if (sscanf(buff, "key_size:\t%u", &val) == 1)
4881
info->key_size = val;
4882
else if (sscanf(buff, "value_size:\t%u", &val) == 1)
4883
info->value_size = val;
4884
else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
4885
info->max_entries = val;
4886
else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
4887
info->map_flags = val;
4888
}
4889
4890
fclose(fp);
4891
4892
return 0;
4893
}
4894
4895
static bool map_is_created(const struct bpf_map *map)
4896
{
4897
return map->obj->state >= OBJ_PREPARED || map->reused;
4898
}
4899
4900
bool bpf_map__autocreate(const struct bpf_map *map)
4901
{
4902
return map->autocreate;
4903
}
4904
4905
int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate)
4906
{
4907
if (map_is_created(map))
4908
return libbpf_err(-EBUSY);
4909
4910
map->autocreate = autocreate;
4911
return 0;
4912
}
4913
4914
int bpf_map__set_autoattach(struct bpf_map *map, bool autoattach)
4915
{
4916
if (!bpf_map__is_struct_ops(map))
4917
return libbpf_err(-EINVAL);
4918
4919
map->autoattach = autoattach;
4920
return 0;
4921
}
4922
4923
bool bpf_map__autoattach(const struct bpf_map *map)
4924
{
4925
return map->autoattach;
4926
}
4927
4928
int bpf_map__reuse_fd(struct bpf_map *map, int fd)
4929
{
4930
struct bpf_map_info info;
4931
__u32 len = sizeof(info), name_len;
4932
int new_fd, err;
4933
char *new_name;
4934
4935
memset(&info, 0, len);
4936
err = bpf_map_get_info_by_fd(fd, &info, &len);
4937
if (err && errno == EINVAL)
4938
err = bpf_get_map_info_from_fdinfo(fd, &info);
4939
if (err)
4940
return libbpf_err(err);
4941
4942
name_len = strlen(info.name);
4943
if (name_len == BPF_OBJ_NAME_LEN - 1 && strncmp(map->name, info.name, name_len) == 0)
4944
new_name = strdup(map->name);
4945
else
4946
new_name = strdup(info.name);
4947
4948
if (!new_name)
4949
return libbpf_err(-errno);
4950
4951
/*
4952
* Like dup(), but make sure new FD is >= 3 and has O_CLOEXEC set.
4953
* This is similar to what we do in ensure_good_fd(), but without
4954
* closing original FD.
4955
*/
4956
new_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
4957
if (new_fd < 0) {
4958
err = -errno;
4959
goto err_free_new_name;
4960
}
4961
4962
err = reuse_fd(map->fd, new_fd);
4963
if (err)
4964
goto err_free_new_name;
4965
4966
free(map->name);
4967
4968
map->name = new_name;
4969
map->def.type = info.type;
4970
map->def.key_size = info.key_size;
4971
map->def.value_size = info.value_size;
4972
map->def.max_entries = info.max_entries;
4973
map->def.map_flags = info.map_flags;
4974
map->btf_key_type_id = info.btf_key_type_id;
4975
map->btf_value_type_id = info.btf_value_type_id;
4976
map->reused = true;
4977
map->map_extra = info.map_extra;
4978
4979
return 0;
4980
4981
err_free_new_name:
4982
free(new_name);
4983
return libbpf_err(err);
4984
}
4985
4986
__u32 bpf_map__max_entries(const struct bpf_map *map)
4987
{
4988
return map->def.max_entries;
4989
}
4990
4991
struct bpf_map *bpf_map__inner_map(struct bpf_map *map)
4992
{
4993
if (!bpf_map_type__is_map_in_map(map->def.type))
4994
return errno = EINVAL, NULL;
4995
4996
return map->inner_map;
4997
}
4998
4999
int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
5000
{
5001
if (map_is_created(map))
5002
return libbpf_err(-EBUSY);
5003
5004
map->def.max_entries = max_entries;
5005
5006
/* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
5007
if (map_is_ringbuf(map))
5008
map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
5009
5010
return 0;
5011
}
5012
5013
static int bpf_object_prepare_token(struct bpf_object *obj)
5014
{
5015
const char *bpffs_path;
5016
int bpffs_fd = -1, token_fd, err;
5017
bool mandatory;
5018
enum libbpf_print_level level;
5019
5020
/* token is explicitly prevented */
5021
if (obj->token_path && obj->token_path[0] == '\0') {
5022
pr_debug("object '%s': token is prevented, skipping...\n", obj->name);
5023
return 0;
5024
}
5025
5026
mandatory = obj->token_path != NULL;
5027
level = mandatory ? LIBBPF_WARN : LIBBPF_DEBUG;
5028
5029
bpffs_path = obj->token_path ?: BPF_FS_DEFAULT_PATH;
5030
bpffs_fd = open(bpffs_path, O_DIRECTORY, O_RDWR);
5031
if (bpffs_fd < 0) {
5032
err = -errno;
5033
__pr(level, "object '%s': failed (%s) to open BPF FS mount at '%s'%s\n",
5034
obj->name, errstr(err), bpffs_path,
5035
mandatory ? "" : ", skipping optional step...");
5036
return mandatory ? err : 0;
5037
}
5038
5039
token_fd = bpf_token_create(bpffs_fd, 0);
5040
close(bpffs_fd);
5041
if (token_fd < 0) {
5042
if (!mandatory && token_fd == -ENOENT) {
5043
pr_debug("object '%s': BPF FS at '%s' doesn't have BPF token delegation set up, skipping...\n",
5044
obj->name, bpffs_path);
5045
return 0;
5046
}
5047
__pr(level, "object '%s': failed (%d) to create BPF token from '%s'%s\n",
5048
obj->name, token_fd, bpffs_path,
5049
mandatory ? "" : ", skipping optional step...");
5050
return mandatory ? token_fd : 0;
5051
}
5052
5053
obj->feat_cache = calloc(1, sizeof(*obj->feat_cache));
5054
if (!obj->feat_cache) {
5055
close(token_fd);
5056
return -ENOMEM;
5057
}
5058
5059
obj->token_fd = token_fd;
5060
obj->feat_cache->token_fd = token_fd;
5061
5062
return 0;
5063
}
5064
5065
static int
5066
bpf_object__probe_loading(struct bpf_object *obj)
5067
{
5068
struct bpf_insn insns[] = {
5069
BPF_MOV64_IMM(BPF_REG_0, 0),
5070
BPF_EXIT_INSN(),
5071
};
5072
int ret, insn_cnt = ARRAY_SIZE(insns);
5073
LIBBPF_OPTS(bpf_prog_load_opts, opts,
5074
.token_fd = obj->token_fd,
5075
.prog_flags = obj->token_fd ? BPF_F_TOKEN_FD : 0,
5076
);
5077
5078
if (obj->gen_loader)
5079
return 0;
5080
5081
ret = bump_rlimit_memlock();
5082
if (ret)
5083
pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %s), you might need to do it explicitly!\n",
5084
errstr(ret));
5085
5086
/* make sure basic loading works */
5087
ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &opts);
5088
if (ret < 0)
5089
ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
5090
if (ret < 0) {
5091
ret = errno;
5092
pr_warn("Error in %s(): %s. Couldn't load trivial BPF program. Make sure your kernel supports BPF (CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is set to big enough value.\n",
5093
__func__, errstr(ret));
5094
return -ret;
5095
}
5096
close(ret);
5097
5098
return 0;
5099
}
5100
5101
bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
5102
{
5103
if (obj->gen_loader)
5104
/* To generate loader program assume the latest kernel
5105
* to avoid doing extra prog_load, map_create syscalls.
5106
*/
5107
return true;
5108
5109
if (obj->token_fd)
5110
return feat_supported(obj->feat_cache, feat_id);
5111
5112
return feat_supported(NULL, feat_id);
5113
}
5114
5115
static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
5116
{
5117
struct bpf_map_info map_info;
5118
__u32 map_info_len = sizeof(map_info);
5119
int err;
5120
5121
memset(&map_info, 0, map_info_len);
5122
err = bpf_map_get_info_by_fd(map_fd, &map_info, &map_info_len);
5123
if (err && errno == EINVAL)
5124
err = bpf_get_map_info_from_fdinfo(map_fd, &map_info);
5125
if (err) {
5126
pr_warn("failed to get map info for map FD %d: %s\n", map_fd,
5127
errstr(err));
5128
return false;
5129
}
5130
5131
/*
5132
* bpf_get_map_info_by_fd() for DEVMAP will always return flags with
5133
* BPF_F_RDONLY_PROG set, but it generally is not set at map creation time.
5134
* Thus, ignore the BPF_F_RDONLY_PROG flag in the flags returned from
5135
* bpf_get_map_info_by_fd() when checking for compatibility with an
5136
* existing DEVMAP.
5137
*/
5138
if (map->def.type == BPF_MAP_TYPE_DEVMAP || map->def.type == BPF_MAP_TYPE_DEVMAP_HASH)
5139
map_info.map_flags &= ~BPF_F_RDONLY_PROG;
5140
5141
return (map_info.type == map->def.type &&
5142
map_info.key_size == map->def.key_size &&
5143
map_info.value_size == map->def.value_size &&
5144
map_info.max_entries == map->def.max_entries &&
5145
map_info.map_flags == map->def.map_flags &&
5146
map_info.map_extra == map->map_extra);
5147
}
5148
5149
static int
5150
bpf_object__reuse_map(struct bpf_map *map)
5151
{
5152
int err, pin_fd;
5153
5154
pin_fd = bpf_obj_get(map->pin_path);
5155
if (pin_fd < 0) {
5156
err = -errno;
5157
if (err == -ENOENT) {
5158
pr_debug("found no pinned map to reuse at '%s'\n",
5159
map->pin_path);
5160
return 0;
5161
}
5162
5163
pr_warn("couldn't retrieve pinned map '%s': %s\n",
5164
map->pin_path, errstr(err));
5165
return err;
5166
}
5167
5168
if (!map_is_reuse_compat(map, pin_fd)) {
5169
pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n",
5170
map->pin_path);
5171
close(pin_fd);
5172
return -EINVAL;
5173
}
5174
5175
err = bpf_map__reuse_fd(map, pin_fd);
5176
close(pin_fd);
5177
if (err)
5178
return err;
5179
5180
map->pinned = true;
5181
pr_debug("reused pinned map at '%s'\n", map->pin_path);
5182
5183
return 0;
5184
}
5185
5186
static int
5187
bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
5188
{
5189
enum libbpf_map_type map_type = map->libbpf_type;
5190
int err, zero = 0;
5191
size_t mmap_sz;
5192
5193
if (obj->gen_loader) {
5194
bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps,
5195
map->mmaped, map->def.value_size);
5196
if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG)
5197
bpf_gen__map_freeze(obj->gen_loader, map - obj->maps);
5198
return 0;
5199
}
5200
5201
err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
5202
if (err) {
5203
err = -errno;
5204
pr_warn("map '%s': failed to set initial contents: %s\n",
5205
bpf_map__name(map), errstr(err));
5206
return err;
5207
}
5208
5209
/* Freeze .rodata and .kconfig map as read-only from syscall side. */
5210
if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) {
5211
err = bpf_map_freeze(map->fd);
5212
if (err) {
5213
err = -errno;
5214
pr_warn("map '%s': failed to freeze as read-only: %s\n",
5215
bpf_map__name(map), errstr(err));
5216
return err;
5217
}
5218
}
5219
5220
/* Remap anonymous mmap()-ed "map initialization image" as
5221
* a BPF map-backed mmap()-ed memory, but preserving the same
5222
* memory address. This will cause kernel to change process'
5223
* page table to point to a different piece of kernel memory,
5224
* but from userspace point of view memory address (and its
5225
* contents, being identical at this point) will stay the
5226
* same. This mapping will be released by bpf_object__close()
5227
* as per normal clean up procedure.
5228
*/
5229
mmap_sz = bpf_map_mmap_sz(map);
5230
if (map->def.map_flags & BPF_F_MMAPABLE) {
5231
void *mmaped;
5232
int prot;
5233
5234
if (map->def.map_flags & BPF_F_RDONLY_PROG)
5235
prot = PROT_READ;
5236
else
5237
prot = PROT_READ | PROT_WRITE;
5238
mmaped = mmap(map->mmaped, mmap_sz, prot, MAP_SHARED | MAP_FIXED, map->fd, 0);
5239
if (mmaped == MAP_FAILED) {
5240
err = -errno;
5241
pr_warn("map '%s': failed to re-mmap() contents: %s\n",
5242
bpf_map__name(map), errstr(err));
5243
return err;
5244
}
5245
map->mmaped = mmaped;
5246
} else if (map->mmaped) {
5247
munmap(map->mmaped, mmap_sz);
5248
map->mmaped = NULL;
5249
}
5250
5251
return 0;
5252
}
5253
5254
static void bpf_map__destroy(struct bpf_map *map);
5255
5256
static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner)
5257
{
5258
LIBBPF_OPTS(bpf_map_create_opts, create_attr);
5259
struct bpf_map_def *def = &map->def;
5260
const char *map_name = NULL;
5261
int err = 0, map_fd;
5262
5263
if (kernel_supports(obj, FEAT_PROG_NAME))
5264
map_name = map->name;
5265
create_attr.map_ifindex = map->map_ifindex;
5266
create_attr.map_flags = def->map_flags;
5267
create_attr.numa_node = map->numa_node;
5268
create_attr.map_extra = map->map_extra;
5269
create_attr.token_fd = obj->token_fd;
5270
if (obj->token_fd)
5271
create_attr.map_flags |= BPF_F_TOKEN_FD;
5272
if (map->excl_prog) {
5273
err = bpf_prog_compute_hash(map->excl_prog);
5274
if (err)
5275
return err;
5276
5277
create_attr.excl_prog_hash = map->excl_prog->hash;
5278
create_attr.excl_prog_hash_size = SHA256_DIGEST_LENGTH;
5279
}
5280
5281
if (bpf_map__is_struct_ops(map)) {
5282
create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
5283
if (map->mod_btf_fd >= 0) {
5284
create_attr.value_type_btf_obj_fd = map->mod_btf_fd;
5285
create_attr.map_flags |= BPF_F_VTYPE_BTF_OBJ_FD;
5286
}
5287
}
5288
5289
if (obj->btf && btf__fd(obj->btf) >= 0) {
5290
create_attr.btf_fd = btf__fd(obj->btf);
5291
create_attr.btf_key_type_id = map->btf_key_type_id;
5292
create_attr.btf_value_type_id = map->btf_value_type_id;
5293
}
5294
5295
if (bpf_map_type__is_map_in_map(def->type)) {
5296
if (map->inner_map) {
5297
err = map_set_def_max_entries(map->inner_map);
5298
if (err)
5299
return err;
5300
err = bpf_object__create_map(obj, map->inner_map, true);
5301
if (err) {
5302
pr_warn("map '%s': failed to create inner map: %s\n",
5303
map->name, errstr(err));
5304
return err;
5305
}
5306
map->inner_map_fd = map->inner_map->fd;
5307
}
5308
if (map->inner_map_fd >= 0)
5309
create_attr.inner_map_fd = map->inner_map_fd;
5310
}
5311
5312
switch (def->type) {
5313
case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
5314
case BPF_MAP_TYPE_CGROUP_ARRAY:
5315
case BPF_MAP_TYPE_STACK_TRACE:
5316
case BPF_MAP_TYPE_ARRAY_OF_MAPS:
5317
case BPF_MAP_TYPE_HASH_OF_MAPS:
5318
case BPF_MAP_TYPE_DEVMAP:
5319
case BPF_MAP_TYPE_DEVMAP_HASH:
5320
case BPF_MAP_TYPE_CPUMAP:
5321
case BPF_MAP_TYPE_XSKMAP:
5322
case BPF_MAP_TYPE_SOCKMAP:
5323
case BPF_MAP_TYPE_SOCKHASH:
5324
case BPF_MAP_TYPE_QUEUE:
5325
case BPF_MAP_TYPE_STACK:
5326
case BPF_MAP_TYPE_ARENA:
5327
create_attr.btf_fd = 0;
5328
create_attr.btf_key_type_id = 0;
5329
create_attr.btf_value_type_id = 0;
5330
map->btf_key_type_id = 0;
5331
map->btf_value_type_id = 0;
5332
break;
5333
case BPF_MAP_TYPE_STRUCT_OPS:
5334
create_attr.btf_value_type_id = 0;
5335
break;
5336
default:
5337
break;
5338
}
5339
5340
if (obj->gen_loader) {
5341
bpf_gen__map_create(obj->gen_loader, def->type, map_name,
5342
def->key_size, def->value_size, def->max_entries,
5343
&create_attr, is_inner ? -1 : map - obj->maps);
5344
/* We keep pretenting we have valid FD to pass various fd >= 0
5345
* checks by just keeping original placeholder FDs in place.
5346
* See bpf_object__add_map() comment.
5347
* This placeholder fd will not be used with any syscall and
5348
* will be reset to -1 eventually.
5349
*/
5350
map_fd = map->fd;
5351
} else {
5352
map_fd = bpf_map_create(def->type, map_name,
5353
def->key_size, def->value_size,
5354
def->max_entries, &create_attr);
5355
}
5356
if (map_fd < 0 && (create_attr.btf_key_type_id || create_attr.btf_value_type_id)) {
5357
err = -errno;
5358
pr_warn("Error in bpf_create_map_xattr(%s): %s. Retrying without BTF.\n",
5359
map->name, errstr(err));
5360
create_attr.btf_fd = 0;
5361
create_attr.btf_key_type_id = 0;
5362
create_attr.btf_value_type_id = 0;
5363
map->btf_key_type_id = 0;
5364
map->btf_value_type_id = 0;
5365
map_fd = bpf_map_create(def->type, map_name,
5366
def->key_size, def->value_size,
5367
def->max_entries, &create_attr);
5368
}
5369
5370
if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
5371
if (obj->gen_loader)
5372
map->inner_map->fd = -1;
5373
bpf_map__destroy(map->inner_map);
5374
zfree(&map->inner_map);
5375
}
5376
5377
if (map_fd < 0)
5378
return map_fd;
5379
5380
/* obj->gen_loader case, prevent reuse_fd() from closing map_fd */
5381
if (map->fd == map_fd)
5382
return 0;
5383
5384
/* Keep placeholder FD value but now point it to the BPF map object.
5385
* This way everything that relied on this map's FD (e.g., relocated
5386
* ldimm64 instructions) will stay valid and won't need adjustments.
5387
* map->fd stays valid but now point to what map_fd points to.
5388
*/
5389
return reuse_fd(map->fd, map_fd);
5390
}
5391
5392
static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map)
5393
{
5394
const struct bpf_map *targ_map;
5395
unsigned int i;
5396
int fd, err = 0;
5397
5398
for (i = 0; i < map->init_slots_sz; i++) {
5399
if (!map->init_slots[i])
5400
continue;
5401
5402
targ_map = map->init_slots[i];
5403
fd = targ_map->fd;
5404
5405
if (obj->gen_loader) {
5406
bpf_gen__populate_outer_map(obj->gen_loader,
5407
map - obj->maps, i,
5408
targ_map - obj->maps);
5409
} else {
5410
err = bpf_map_update_elem(map->fd, &i, &fd, 0);
5411
}
5412
if (err) {
5413
err = -errno;
5414
pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %s\n",
5415
map->name, i, targ_map->name, fd, errstr(err));
5416
return err;
5417
}
5418
pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n",
5419
map->name, i, targ_map->name, fd);
5420
}
5421
5422
zfree(&map->init_slots);
5423
map->init_slots_sz = 0;
5424
5425
return 0;
5426
}
5427
5428
static int init_prog_array_slots(struct bpf_object *obj, struct bpf_map *map)
5429
{
5430
const struct bpf_program *targ_prog;
5431
unsigned int i;
5432
int fd, err;
5433
5434
if (obj->gen_loader)
5435
return -ENOTSUP;
5436
5437
for (i = 0; i < map->init_slots_sz; i++) {
5438
if (!map->init_slots[i])
5439
continue;
5440
5441
targ_prog = map->init_slots[i];
5442
fd = bpf_program__fd(targ_prog);
5443
5444
err = bpf_map_update_elem(map->fd, &i, &fd, 0);
5445
if (err) {
5446
err = -errno;
5447
pr_warn("map '%s': failed to initialize slot [%d] to prog '%s' fd=%d: %s\n",
5448
map->name, i, targ_prog->name, fd, errstr(err));
5449
return err;
5450
}
5451
pr_debug("map '%s': slot [%d] set to prog '%s' fd=%d\n",
5452
map->name, i, targ_prog->name, fd);
5453
}
5454
5455
zfree(&map->init_slots);
5456
map->init_slots_sz = 0;
5457
5458
return 0;
5459
}
5460
5461
static int bpf_object_init_prog_arrays(struct bpf_object *obj)
5462
{
5463
struct bpf_map *map;
5464
int i, err;
5465
5466
for (i = 0; i < obj->nr_maps; i++) {
5467
map = &obj->maps[i];
5468
5469
if (!map->init_slots_sz || map->def.type != BPF_MAP_TYPE_PROG_ARRAY)
5470
continue;
5471
5472
err = init_prog_array_slots(obj, map);
5473
if (err < 0)
5474
return err;
5475
}
5476
return 0;
5477
}
5478
5479
static int map_set_def_max_entries(struct bpf_map *map)
5480
{
5481
if (map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !map->def.max_entries) {
5482
int nr_cpus;
5483
5484
nr_cpus = libbpf_num_possible_cpus();
5485
if (nr_cpus < 0) {
5486
pr_warn("map '%s': failed to determine number of system CPUs: %d\n",
5487
map->name, nr_cpus);
5488
return nr_cpus;
5489
}
5490
pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus);
5491
map->def.max_entries = nr_cpus;
5492
}
5493
5494
return 0;
5495
}
5496
5497
static int
5498
bpf_object__create_maps(struct bpf_object *obj)
5499
{
5500
struct bpf_map *map;
5501
unsigned int i, j;
5502
int err;
5503
bool retried;
5504
5505
for (i = 0; i < obj->nr_maps; i++) {
5506
map = &obj->maps[i];
5507
5508
/* To support old kernels, we skip creating global data maps
5509
* (.rodata, .data, .kconfig, etc); later on, during program
5510
* loading, if we detect that at least one of the to-be-loaded
5511
* programs is referencing any global data map, we'll error
5512
* out with program name and relocation index logged.
5513
* This approach allows to accommodate Clang emitting
5514
* unnecessary .rodata.str1.1 sections for string literals,
5515
* but also it allows to have CO-RE applications that use
5516
* global variables in some of BPF programs, but not others.
5517
* If those global variable-using programs are not loaded at
5518
* runtime due to bpf_program__set_autoload(prog, false),
5519
* bpf_object loading will succeed just fine even on old
5520
* kernels.
5521
*/
5522
if (bpf_map__is_internal(map) && !kernel_supports(obj, FEAT_GLOBAL_DATA))
5523
map->autocreate = false;
5524
5525
if (!map->autocreate) {
5526
pr_debug("map '%s': skipped auto-creating...\n", map->name);
5527
continue;
5528
}
5529
5530
err = map_set_def_max_entries(map);
5531
if (err)
5532
goto err_out;
5533
5534
retried = false;
5535
retry:
5536
if (map->pin_path) {
5537
err = bpf_object__reuse_map(map);
5538
if (err) {
5539
pr_warn("map '%s': error reusing pinned map\n",
5540
map->name);
5541
goto err_out;
5542
}
5543
if (retried && map->fd < 0) {
5544
pr_warn("map '%s': cannot find pinned map\n",
5545
map->name);
5546
err = -ENOENT;
5547
goto err_out;
5548
}
5549
}
5550
5551
if (map->reused) {
5552
pr_debug("map '%s': skipping creation (preset fd=%d)\n",
5553
map->name, map->fd);
5554
} else {
5555
err = bpf_object__create_map(obj, map, false);
5556
if (err)
5557
goto err_out;
5558
5559
pr_debug("map '%s': created successfully, fd=%d\n",
5560
map->name, map->fd);
5561
5562
if (bpf_map__is_internal(map)) {
5563
err = bpf_object__populate_internal_map(obj, map);
5564
if (err < 0)
5565
goto err_out;
5566
} else if (map->def.type == BPF_MAP_TYPE_ARENA) {
5567
map->mmaped = mmap((void *)(long)map->map_extra,
5568
bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE,
5569
map->map_extra ? MAP_SHARED | MAP_FIXED : MAP_SHARED,
5570
map->fd, 0);
5571
if (map->mmaped == MAP_FAILED) {
5572
err = -errno;
5573
map->mmaped = NULL;
5574
pr_warn("map '%s': failed to mmap arena: %s\n",
5575
map->name, errstr(err));
5576
return err;
5577
}
5578
if (obj->arena_data) {
5579
memcpy(map->mmaped, obj->arena_data, obj->arena_data_sz);
5580
zfree(&obj->arena_data);
5581
}
5582
}
5583
if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) {
5584
err = init_map_in_map_slots(obj, map);
5585
if (err < 0)
5586
goto err_out;
5587
}
5588
}
5589
5590
if (map->pin_path && !map->pinned) {
5591
err = bpf_map__pin(map, NULL);
5592
if (err) {
5593
if (!retried && err == -EEXIST) {
5594
retried = true;
5595
goto retry;
5596
}
5597
pr_warn("map '%s': failed to auto-pin at '%s': %s\n",
5598
map->name, map->pin_path, errstr(err));
5599
goto err_out;
5600
}
5601
}
5602
}
5603
5604
return 0;
5605
5606
err_out:
5607
pr_warn("map '%s': failed to create: %s\n", map->name, errstr(err));
5608
pr_perm_msg(err);
5609
for (j = 0; j < i; j++)
5610
zclose(obj->maps[j].fd);
5611
return err;
5612
}
5613
5614
static bool bpf_core_is_flavor_sep(const char *s)
5615
{
5616
/* check X___Y name pattern, where X and Y are not underscores */
5617
return s[0] != '_' && /* X */
5618
s[1] == '_' && s[2] == '_' && s[3] == '_' && /* ___ */
5619
s[4] != '_'; /* Y */
5620
}
5621
5622
/* Given 'some_struct_name___with_flavor' return the length of a name prefix
5623
* before last triple underscore. Struct name part after last triple
5624
* underscore is ignored by BPF CO-RE relocation during relocation matching.
5625
*/
5626
size_t bpf_core_essential_name_len(const char *name)
5627
{
5628
size_t n = strlen(name);
5629
int i;
5630
5631
for (i = n - 5; i >= 0; i--) {
5632
if (bpf_core_is_flavor_sep(name + i))
5633
return i + 1;
5634
}
5635
return n;
5636
}
5637
5638
void bpf_core_free_cands(struct bpf_core_cand_list *cands)
5639
{
5640
if (!cands)
5641
return;
5642
5643
free(cands->cands);
5644
free(cands);
5645
}
5646
5647
int bpf_core_add_cands(struct bpf_core_cand *local_cand,
5648
size_t local_essent_len,
5649
const struct btf *targ_btf,
5650
const char *targ_btf_name,
5651
int targ_start_id,
5652
struct bpf_core_cand_list *cands)
5653
{
5654
struct bpf_core_cand *new_cands, *cand;
5655
const struct btf_type *t, *local_t;
5656
const char *targ_name, *local_name;
5657
size_t targ_essent_len;
5658
int n, i;
5659
5660
local_t = btf__type_by_id(local_cand->btf, local_cand->id);
5661
local_name = btf__str_by_offset(local_cand->btf, local_t->name_off);
5662
5663
n = btf__type_cnt(targ_btf);
5664
for (i = targ_start_id; i < n; i++) {
5665
t = btf__type_by_id(targ_btf, i);
5666
if (!btf_kind_core_compat(t, local_t))
5667
continue;
5668
5669
targ_name = btf__name_by_offset(targ_btf, t->name_off);
5670
if (str_is_empty(targ_name))
5671
continue;
5672
5673
targ_essent_len = bpf_core_essential_name_len(targ_name);
5674
if (targ_essent_len != local_essent_len)
5675
continue;
5676
5677
if (strncmp(local_name, targ_name, local_essent_len) != 0)
5678
continue;
5679
5680
pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s in [%s]\n",
5681
local_cand->id, btf_kind_str(local_t),
5682
local_name, i, btf_kind_str(t), targ_name,
5683
targ_btf_name);
5684
new_cands = libbpf_reallocarray(cands->cands, cands->len + 1,
5685
sizeof(*cands->cands));
5686
if (!new_cands)
5687
return -ENOMEM;
5688
5689
cand = &new_cands[cands->len];
5690
cand->btf = targ_btf;
5691
cand->id = i;
5692
5693
cands->cands = new_cands;
5694
cands->len++;
5695
}
5696
return 0;
5697
}
5698
5699
static int load_module_btfs(struct bpf_object *obj)
5700
{
5701
struct bpf_btf_info info;
5702
struct module_btf *mod_btf;
5703
struct btf *btf;
5704
char name[64];
5705
__u32 id = 0, len;
5706
int err, fd;
5707
5708
if (obj->btf_modules_loaded)
5709
return 0;
5710
5711
if (obj->gen_loader)
5712
return 0;
5713
5714
/* don't do this again, even if we find no module BTFs */
5715
obj->btf_modules_loaded = true;
5716
5717
/* kernel too old to support module BTFs */
5718
if (!kernel_supports(obj, FEAT_MODULE_BTF))
5719
return 0;
5720
5721
while (true) {
5722
err = bpf_btf_get_next_id(id, &id);
5723
if (err && errno == ENOENT)
5724
return 0;
5725
if (err && errno == EPERM) {
5726
pr_debug("skipping module BTFs loading, missing privileges\n");
5727
return 0;
5728
}
5729
if (err) {
5730
err = -errno;
5731
pr_warn("failed to iterate BTF objects: %s\n", errstr(err));
5732
return err;
5733
}
5734
5735
fd = bpf_btf_get_fd_by_id(id);
5736
if (fd < 0) {
5737
if (errno == ENOENT)
5738
continue; /* expected race: BTF was unloaded */
5739
err = -errno;
5740
pr_warn("failed to get BTF object #%d FD: %s\n", id, errstr(err));
5741
return err;
5742
}
5743
5744
len = sizeof(info);
5745
memset(&info, 0, sizeof(info));
5746
info.name = ptr_to_u64(name);
5747
info.name_len = sizeof(name);
5748
5749
err = bpf_btf_get_info_by_fd(fd, &info, &len);
5750
if (err) {
5751
err = -errno;
5752
pr_warn("failed to get BTF object #%d info: %s\n", id, errstr(err));
5753
goto err_out;
5754
}
5755
5756
/* ignore non-module BTFs */
5757
if (!info.kernel_btf || strcmp(name, "vmlinux") == 0) {
5758
close(fd);
5759
continue;
5760
}
5761
5762
btf = btf_get_from_fd(fd, obj->btf_vmlinux);
5763
err = libbpf_get_error(btf);
5764
if (err) {
5765
pr_warn("failed to load module [%s]'s BTF object #%d: %s\n",
5766
name, id, errstr(err));
5767
goto err_out;
5768
}
5769
5770
err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap,
5771
sizeof(*obj->btf_modules), obj->btf_module_cnt + 1);
5772
if (err)
5773
goto err_out;
5774
5775
mod_btf = &obj->btf_modules[obj->btf_module_cnt++];
5776
5777
mod_btf->btf = btf;
5778
mod_btf->id = id;
5779
mod_btf->fd = fd;
5780
mod_btf->name = strdup(name);
5781
if (!mod_btf->name) {
5782
err = -ENOMEM;
5783
goto err_out;
5784
}
5785
continue;
5786
5787
err_out:
5788
close(fd);
5789
return err;
5790
}
5791
5792
return 0;
5793
}
5794
5795
static struct bpf_core_cand_list *
5796
bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id)
5797
{
5798
struct bpf_core_cand local_cand = {};
5799
struct bpf_core_cand_list *cands;
5800
const struct btf *main_btf;
5801
const struct btf_type *local_t;
5802
const char *local_name;
5803
size_t local_essent_len;
5804
int err, i;
5805
5806
local_cand.btf = local_btf;
5807
local_cand.id = local_type_id;
5808
local_t = btf__type_by_id(local_btf, local_type_id);
5809
if (!local_t)
5810
return ERR_PTR(-EINVAL);
5811
5812
local_name = btf__name_by_offset(local_btf, local_t->name_off);
5813
if (str_is_empty(local_name))
5814
return ERR_PTR(-EINVAL);
5815
local_essent_len = bpf_core_essential_name_len(local_name);
5816
5817
cands = calloc(1, sizeof(*cands));
5818
if (!cands)
5819
return ERR_PTR(-ENOMEM);
5820
5821
/* Attempt to find target candidates in vmlinux BTF first */
5822
main_btf = obj->btf_vmlinux_override ?: obj->btf_vmlinux;
5823
err = bpf_core_add_cands(&local_cand, local_essent_len, main_btf, "vmlinux", 1, cands);
5824
if (err)
5825
goto err_out;
5826
5827
/* if vmlinux BTF has any candidate, don't got for module BTFs */
5828
if (cands->len)
5829
return cands;
5830
5831
/* if vmlinux BTF was overridden, don't attempt to load module BTFs */
5832
if (obj->btf_vmlinux_override)
5833
return cands;
5834
5835
/* now look through module BTFs, trying to still find candidates */
5836
err = load_module_btfs(obj);
5837
if (err)
5838
goto err_out;
5839
5840
for (i = 0; i < obj->btf_module_cnt; i++) {
5841
err = bpf_core_add_cands(&local_cand, local_essent_len,
5842
obj->btf_modules[i].btf,
5843
obj->btf_modules[i].name,
5844
btf__type_cnt(obj->btf_vmlinux),
5845
cands);
5846
if (err)
5847
goto err_out;
5848
}
5849
5850
return cands;
5851
err_out:
5852
bpf_core_free_cands(cands);
5853
return ERR_PTR(err);
5854
}
5855
5856
/* Check local and target types for compatibility. This check is used for
5857
* type-based CO-RE relocations and follow slightly different rules than
5858
* field-based relocations. This function assumes that root types were already
5859
* checked for name match. Beyond that initial root-level name check, names
5860
* are completely ignored. Compatibility rules are as follows:
5861
* - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but
5862
* kind should match for local and target types (i.e., STRUCT is not
5863
* compatible with UNION);
5864
* - for ENUMs, the size is ignored;
5865
* - for INT, size and signedness are ignored;
5866
* - for ARRAY, dimensionality is ignored, element types are checked for
5867
* compatibility recursively;
5868
* - CONST/VOLATILE/RESTRICT modifiers are ignored;
5869
* - TYPEDEFs/PTRs are compatible if types they pointing to are compatible;
5870
* - FUNC_PROTOs are compatible if they have compatible signature: same
5871
* number of input args and compatible return and argument types.
5872
* These rules are not set in stone and probably will be adjusted as we get
5873
* more experience with using BPF CO-RE relocations.
5874
*/
5875
int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
5876
const struct btf *targ_btf, __u32 targ_id)
5877
{
5878
return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id, 32);
5879
}
5880
5881
int bpf_core_types_match(const struct btf *local_btf, __u32 local_id,
5882
const struct btf *targ_btf, __u32 targ_id)
5883
{
5884
return __bpf_core_types_match(local_btf, local_id, targ_btf, targ_id, false, 32);
5885
}
5886
5887
static size_t bpf_core_hash_fn(const long key, void *ctx)
5888
{
5889
return key;
5890
}
5891
5892
static bool bpf_core_equal_fn(const long k1, const long k2, void *ctx)
5893
{
5894
return k1 == k2;
5895
}
5896
5897
static int record_relo_core(struct bpf_program *prog,
5898
const struct bpf_core_relo *core_relo, int insn_idx)
5899
{
5900
struct reloc_desc *relos, *relo;
5901
5902
relos = libbpf_reallocarray(prog->reloc_desc,
5903
prog->nr_reloc + 1, sizeof(*relos));
5904
if (!relos)
5905
return -ENOMEM;
5906
relo = &relos[prog->nr_reloc];
5907
relo->type = RELO_CORE;
5908
relo->insn_idx = insn_idx;
5909
relo->core_relo = core_relo;
5910
prog->reloc_desc = relos;
5911
prog->nr_reloc++;
5912
return 0;
5913
}
5914
5915
static const struct bpf_core_relo *find_relo_core(struct bpf_program *prog, int insn_idx)
5916
{
5917
struct reloc_desc *relo;
5918
int i;
5919
5920
for (i = 0; i < prog->nr_reloc; i++) {
5921
relo = &prog->reloc_desc[i];
5922
if (relo->type != RELO_CORE || relo->insn_idx != insn_idx)
5923
continue;
5924
5925
return relo->core_relo;
5926
}
5927
5928
return NULL;
5929
}
5930
5931
static int bpf_core_resolve_relo(struct bpf_program *prog,
5932
const struct bpf_core_relo *relo,
5933
int relo_idx,
5934
const struct btf *local_btf,
5935
struct hashmap *cand_cache,
5936
struct bpf_core_relo_res *targ_res)
5937
{
5938
struct bpf_core_spec specs_scratch[3] = {};
5939
struct bpf_core_cand_list *cands = NULL;
5940
const char *prog_name = prog->name;
5941
const struct btf_type *local_type;
5942
const char *local_name;
5943
__u32 local_id = relo->type_id;
5944
int err;
5945
5946
local_type = btf__type_by_id(local_btf, local_id);
5947
if (!local_type)
5948
return -EINVAL;
5949
5950
local_name = btf__name_by_offset(local_btf, local_type->name_off);
5951
if (!local_name)
5952
return -EINVAL;
5953
5954
if (relo->kind != BPF_CORE_TYPE_ID_LOCAL &&
5955
!hashmap__find(cand_cache, local_id, &cands)) {
5956
cands = bpf_core_find_cands(prog->obj, local_btf, local_id);
5957
if (IS_ERR(cands)) {
5958
pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n",
5959
prog_name, relo_idx, local_id, btf_kind_str(local_type),
5960
local_name, PTR_ERR(cands));
5961
return PTR_ERR(cands);
5962
}
5963
err = hashmap__set(cand_cache, local_id, cands, NULL, NULL);
5964
if (err) {
5965
bpf_core_free_cands(cands);
5966
return err;
5967
}
5968
}
5969
5970
return bpf_core_calc_relo_insn(prog_name, relo, relo_idx, local_btf, cands, specs_scratch,
5971
targ_res);
5972
}
5973
5974
static int
5975
bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
5976
{
5977
const struct btf_ext_info_sec *sec;
5978
struct bpf_core_relo_res targ_res;
5979
const struct bpf_core_relo *rec;
5980
const struct btf_ext_info *seg;
5981
struct hashmap_entry *entry;
5982
struct hashmap *cand_cache = NULL;
5983
struct bpf_program *prog;
5984
struct bpf_insn *insn;
5985
const char *sec_name;
5986
int i, err = 0, insn_idx, sec_idx, sec_num;
5987
5988
if (obj->btf_ext->core_relo_info.len == 0)
5989
return 0;
5990
5991
if (targ_btf_path) {
5992
obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL);
5993
err = libbpf_get_error(obj->btf_vmlinux_override);
5994
if (err) {
5995
pr_warn("failed to parse target BTF: %s\n", errstr(err));
5996
return err;
5997
}
5998
}
5999
6000
cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL);
6001
if (IS_ERR(cand_cache)) {
6002
err = PTR_ERR(cand_cache);
6003
goto out;
6004
}
6005
6006
seg = &obj->btf_ext->core_relo_info;
6007
sec_num = 0;
6008
for_each_btf_ext_sec(seg, sec) {
6009
sec_idx = seg->sec_idxs[sec_num];
6010
sec_num++;
6011
6012
sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
6013
if (str_is_empty(sec_name)) {
6014
err = -EINVAL;
6015
goto out;
6016
}
6017
6018
pr_debug("sec '%s': found %d CO-RE relocations\n", sec_name, sec->num_info);
6019
6020
for_each_btf_ext_rec(seg, sec, i, rec) {
6021
if (rec->insn_off % BPF_INSN_SZ)
6022
return -EINVAL;
6023
insn_idx = rec->insn_off / BPF_INSN_SZ;
6024
prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
6025
if (!prog) {
6026
/* When __weak subprog is "overridden" by another instance
6027
* of the subprog from a different object file, linker still
6028
* appends all the .BTF.ext info that used to belong to that
6029
* eliminated subprogram.
6030
* This is similar to what x86-64 linker does for relocations.
6031
* So just ignore such relocations just like we ignore
6032
* subprog instructions when discovering subprograms.
6033
*/
6034
pr_debug("sec '%s': skipping CO-RE relocation #%d for insn #%d belonging to eliminated weak subprogram\n",
6035
sec_name, i, insn_idx);
6036
continue;
6037
}
6038
/* no need to apply CO-RE relocation if the program is
6039
* not going to be loaded
6040
*/
6041
if (!prog->autoload)
6042
continue;
6043
6044
/* adjust insn_idx from section frame of reference to the local
6045
* program's frame of reference; (sub-)program code is not yet
6046
* relocated, so it's enough to just subtract in-section offset
6047
*/
6048
insn_idx = insn_idx - prog->sec_insn_off;
6049
if (insn_idx >= prog->insns_cnt)
6050
return -EINVAL;
6051
insn = &prog->insns[insn_idx];
6052
6053
err = record_relo_core(prog, rec, insn_idx);
6054
if (err) {
6055
pr_warn("prog '%s': relo #%d: failed to record relocation: %s\n",
6056
prog->name, i, errstr(err));
6057
goto out;
6058
}
6059
6060
if (prog->obj->gen_loader)
6061
continue;
6062
6063
err = bpf_core_resolve_relo(prog, rec, i, obj->btf, cand_cache, &targ_res);
6064
if (err) {
6065
pr_warn("prog '%s': relo #%d: failed to relocate: %s\n",
6066
prog->name, i, errstr(err));
6067
goto out;
6068
}
6069
6070
err = bpf_core_patch_insn(prog->name, insn, insn_idx, rec, i, &targ_res);
6071
if (err) {
6072
pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %s\n",
6073
prog->name, i, insn_idx, errstr(err));
6074
goto out;
6075
}
6076
}
6077
}
6078
6079
out:
6080
/* obj->btf_vmlinux and module BTFs are freed after object load */
6081
btf__free(obj->btf_vmlinux_override);
6082
obj->btf_vmlinux_override = NULL;
6083
6084
if (!IS_ERR_OR_NULL(cand_cache)) {
6085
hashmap__for_each_entry(cand_cache, entry, i) {
6086
bpf_core_free_cands(entry->pvalue);
6087
}
6088
hashmap__free(cand_cache);
6089
}
6090
return err;
6091
}
6092
6093
/* base map load ldimm64 special constant, used also for log fixup logic */
6094
#define POISON_LDIMM64_MAP_BASE 2001000000
6095
#define POISON_LDIMM64_MAP_PFX "200100"
6096
6097
static void poison_map_ldimm64(struct bpf_program *prog, int relo_idx,
6098
int insn_idx, struct bpf_insn *insn,
6099
int map_idx, const struct bpf_map *map)
6100
{
6101
int i;
6102
6103
pr_debug("prog '%s': relo #%d: poisoning insn #%d that loads map #%d '%s'\n",
6104
prog->name, relo_idx, insn_idx, map_idx, map->name);
6105
6106
/* we turn single ldimm64 into two identical invalid calls */
6107
for (i = 0; i < 2; i++) {
6108
insn->code = BPF_JMP | BPF_CALL;
6109
insn->dst_reg = 0;
6110
insn->src_reg = 0;
6111
insn->off = 0;
6112
/* if this instruction is reachable (not a dead code),
6113
* verifier will complain with something like:
6114
* invalid func unknown#2001000123
6115
* where lower 123 is map index into obj->maps[] array
6116
*/
6117
insn->imm = POISON_LDIMM64_MAP_BASE + map_idx;
6118
6119
insn++;
6120
}
6121
}
6122
6123
/* unresolved kfunc call special constant, used also for log fixup logic */
6124
#define POISON_CALL_KFUNC_BASE 2002000000
6125
#define POISON_CALL_KFUNC_PFX "2002"
6126
6127
static void poison_kfunc_call(struct bpf_program *prog, int relo_idx,
6128
int insn_idx, struct bpf_insn *insn,
6129
int ext_idx, const struct extern_desc *ext)
6130
{
6131
pr_debug("prog '%s': relo #%d: poisoning insn #%d that calls kfunc '%s'\n",
6132
prog->name, relo_idx, insn_idx, ext->name);
6133
6134
/* we turn kfunc call into invalid helper call with identifiable constant */
6135
insn->code = BPF_JMP | BPF_CALL;
6136
insn->dst_reg = 0;
6137
insn->src_reg = 0;
6138
insn->off = 0;
6139
/* if this instruction is reachable (not a dead code),
6140
* verifier will complain with something like:
6141
* invalid func unknown#2001000123
6142
* where lower 123 is extern index into obj->externs[] array
6143
*/
6144
insn->imm = POISON_CALL_KFUNC_BASE + ext_idx;
6145
}
6146
6147
/* Relocate data references within program code:
6148
* - map references;
6149
* - global variable references;
6150
* - extern references.
6151
*/
6152
static int
6153
bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
6154
{
6155
int i;
6156
6157
for (i = 0; i < prog->nr_reloc; i++) {
6158
struct reloc_desc *relo = &prog->reloc_desc[i];
6159
struct bpf_insn *insn = &prog->insns[relo->insn_idx];
6160
const struct bpf_map *map;
6161
struct extern_desc *ext;
6162
6163
switch (relo->type) {
6164
case RELO_LD64:
6165
map = &obj->maps[relo->map_idx];
6166
if (obj->gen_loader) {
6167
insn[0].src_reg = BPF_PSEUDO_MAP_IDX;
6168
insn[0].imm = relo->map_idx;
6169
} else if (map->autocreate) {
6170
insn[0].src_reg = BPF_PSEUDO_MAP_FD;
6171
insn[0].imm = map->fd;
6172
} else {
6173
poison_map_ldimm64(prog, i, relo->insn_idx, insn,
6174
relo->map_idx, map);
6175
}
6176
break;
6177
case RELO_DATA:
6178
map = &obj->maps[relo->map_idx];
6179
insn[1].imm = insn[0].imm + relo->sym_off;
6180
if (obj->gen_loader) {
6181
insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
6182
insn[0].imm = relo->map_idx;
6183
} else if (map->autocreate) {
6184
insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
6185
insn[0].imm = map->fd;
6186
} else {
6187
poison_map_ldimm64(prog, i, relo->insn_idx, insn,
6188
relo->map_idx, map);
6189
}
6190
break;
6191
case RELO_EXTERN_LD64:
6192
ext = &obj->externs[relo->ext_idx];
6193
if (ext->type == EXT_KCFG) {
6194
if (obj->gen_loader) {
6195
insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
6196
insn[0].imm = obj->kconfig_map_idx;
6197
} else {
6198
insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
6199
insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
6200
}
6201
insn[1].imm = ext->kcfg.data_off;
6202
} else /* EXT_KSYM */ {
6203
if (ext->ksym.type_id && ext->is_set) { /* typed ksyms */
6204
insn[0].src_reg = BPF_PSEUDO_BTF_ID;
6205
insn[0].imm = ext->ksym.kernel_btf_id;
6206
insn[1].imm = ext->ksym.kernel_btf_obj_fd;
6207
} else { /* typeless ksyms or unresolved typed ksyms */
6208
insn[0].imm = (__u32)ext->ksym.addr;
6209
insn[1].imm = ext->ksym.addr >> 32;
6210
}
6211
}
6212
break;
6213
case RELO_EXTERN_CALL:
6214
ext = &obj->externs[relo->ext_idx];
6215
insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL;
6216
if (ext->is_set) {
6217
insn[0].imm = ext->ksym.kernel_btf_id;
6218
insn[0].off = ext->ksym.btf_fd_idx;
6219
} else { /* unresolved weak kfunc call */
6220
poison_kfunc_call(prog, i, relo->insn_idx, insn,
6221
relo->ext_idx, ext);
6222
}
6223
break;
6224
case RELO_SUBPROG_ADDR:
6225
if (insn[0].src_reg != BPF_PSEUDO_FUNC) {
6226
pr_warn("prog '%s': relo #%d: bad insn\n",
6227
prog->name, i);
6228
return -EINVAL;
6229
}
6230
/* handled already */
6231
break;
6232
case RELO_CALL:
6233
/* handled already */
6234
break;
6235
case RELO_CORE:
6236
/* will be handled by bpf_program_record_relos() */
6237
break;
6238
default:
6239
pr_warn("prog '%s': relo #%d: bad relo type %d\n",
6240
prog->name, i, relo->type);
6241
return -EINVAL;
6242
}
6243
}
6244
6245
return 0;
6246
}
6247
6248
static int adjust_prog_btf_ext_info(const struct bpf_object *obj,
6249
const struct bpf_program *prog,
6250
const struct btf_ext_info *ext_info,
6251
void **prog_info, __u32 *prog_rec_cnt,
6252
__u32 *prog_rec_sz)
6253
{
6254
void *copy_start = NULL, *copy_end = NULL;
6255
void *rec, *rec_end, *new_prog_info;
6256
const struct btf_ext_info_sec *sec;
6257
size_t old_sz, new_sz;
6258
int i, sec_num, sec_idx, off_adj;
6259
6260
sec_num = 0;
6261
for_each_btf_ext_sec(ext_info, sec) {
6262
sec_idx = ext_info->sec_idxs[sec_num];
6263
sec_num++;
6264
if (prog->sec_idx != sec_idx)
6265
continue;
6266
6267
for_each_btf_ext_rec(ext_info, sec, i, rec) {
6268
__u32 insn_off = *(__u32 *)rec / BPF_INSN_SZ;
6269
6270
if (insn_off < prog->sec_insn_off)
6271
continue;
6272
if (insn_off >= prog->sec_insn_off + prog->sec_insn_cnt)
6273
break;
6274
6275
if (!copy_start)
6276
copy_start = rec;
6277
copy_end = rec + ext_info->rec_size;
6278
}
6279
6280
if (!copy_start)
6281
return -ENOENT;
6282
6283
/* append func/line info of a given (sub-)program to the main
6284
* program func/line info
6285
*/
6286
old_sz = (size_t)(*prog_rec_cnt) * ext_info->rec_size;
6287
new_sz = old_sz + (copy_end - copy_start);
6288
new_prog_info = realloc(*prog_info, new_sz);
6289
if (!new_prog_info)
6290
return -ENOMEM;
6291
*prog_info = new_prog_info;
6292
*prog_rec_cnt = new_sz / ext_info->rec_size;
6293
memcpy(new_prog_info + old_sz, copy_start, copy_end - copy_start);
6294
6295
/* Kernel instruction offsets are in units of 8-byte
6296
* instructions, while .BTF.ext instruction offsets generated
6297
* by Clang are in units of bytes. So convert Clang offsets
6298
* into kernel offsets and adjust offset according to program
6299
* relocated position.
6300
*/
6301
off_adj = prog->sub_insn_off - prog->sec_insn_off;
6302
rec = new_prog_info + old_sz;
6303
rec_end = new_prog_info + new_sz;
6304
for (; rec < rec_end; rec += ext_info->rec_size) {
6305
__u32 *insn_off = rec;
6306
6307
*insn_off = *insn_off / BPF_INSN_SZ + off_adj;
6308
}
6309
*prog_rec_sz = ext_info->rec_size;
6310
return 0;
6311
}
6312
6313
return -ENOENT;
6314
}
6315
6316
static int
6317
reloc_prog_func_and_line_info(const struct bpf_object *obj,
6318
struct bpf_program *main_prog,
6319
const struct bpf_program *prog)
6320
{
6321
int err;
6322
6323
/* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't
6324
* support func/line info
6325
*/
6326
if (!obj->btf_ext || !kernel_supports(obj, FEAT_BTF_FUNC))
6327
return 0;
6328
6329
/* only attempt func info relocation if main program's func_info
6330
* relocation was successful
6331
*/
6332
if (main_prog != prog && !main_prog->func_info)
6333
goto line_info;
6334
6335
err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->func_info,
6336
&main_prog->func_info,
6337
&main_prog->func_info_cnt,
6338
&main_prog->func_info_rec_size);
6339
if (err) {
6340
if (err != -ENOENT) {
6341
pr_warn("prog '%s': error relocating .BTF.ext function info: %s\n",
6342
prog->name, errstr(err));
6343
return err;
6344
}
6345
if (main_prog->func_info) {
6346
/*
6347
* Some info has already been found but has problem
6348
* in the last btf_ext reloc. Must have to error out.
6349
*/
6350
pr_warn("prog '%s': missing .BTF.ext function info.\n", prog->name);
6351
return err;
6352
}
6353
/* Have problem loading the very first info. Ignore the rest. */
6354
pr_warn("prog '%s': missing .BTF.ext function info for the main program, skipping all of .BTF.ext func info.\n",
6355
prog->name);
6356
}
6357
6358
line_info:
6359
/* don't relocate line info if main program's relocation failed */
6360
if (main_prog != prog && !main_prog->line_info)
6361
return 0;
6362
6363
err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->line_info,
6364
&main_prog->line_info,
6365
&main_prog->line_info_cnt,
6366
&main_prog->line_info_rec_size);
6367
if (err) {
6368
if (err != -ENOENT) {
6369
pr_warn("prog '%s': error relocating .BTF.ext line info: %s\n",
6370
prog->name, errstr(err));
6371
return err;
6372
}
6373
if (main_prog->line_info) {
6374
/*
6375
* Some info has already been found but has problem
6376
* in the last btf_ext reloc. Must have to error out.
6377
*/
6378
pr_warn("prog '%s': missing .BTF.ext line info.\n", prog->name);
6379
return err;
6380
}
6381
/* Have problem loading the very first info. Ignore the rest. */
6382
pr_warn("prog '%s': missing .BTF.ext line info for the main program, skipping all of .BTF.ext line info.\n",
6383
prog->name);
6384
}
6385
return 0;
6386
}
6387
6388
static int cmp_relo_by_insn_idx(const void *key, const void *elem)
6389
{
6390
size_t insn_idx = *(const size_t *)key;
6391
const struct reloc_desc *relo = elem;
6392
6393
if (insn_idx == relo->insn_idx)
6394
return 0;
6395
return insn_idx < relo->insn_idx ? -1 : 1;
6396
}
6397
6398
static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx)
6399
{
6400
if (!prog->nr_reloc)
6401
return NULL;
6402
return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc,
6403
sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx);
6404
}
6405
6406
static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_program *subprog)
6407
{
6408
int new_cnt = main_prog->nr_reloc + subprog->nr_reloc;
6409
struct reloc_desc *relos;
6410
int i;
6411
6412
if (main_prog == subprog)
6413
return 0;
6414
relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos));
6415
/* if new count is zero, reallocarray can return a valid NULL result;
6416
* in this case the previous pointer will be freed, so we *have to*
6417
* reassign old pointer to the new value (even if it's NULL)
6418
*/
6419
if (!relos && new_cnt)
6420
return -ENOMEM;
6421
if (subprog->nr_reloc)
6422
memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc,
6423
sizeof(*relos) * subprog->nr_reloc);
6424
6425
for (i = main_prog->nr_reloc; i < new_cnt; i++)
6426
relos[i].insn_idx += subprog->sub_insn_off;
6427
/* After insn_idx adjustment the 'relos' array is still sorted
6428
* by insn_idx and doesn't break bsearch.
6429
*/
6430
main_prog->reloc_desc = relos;
6431
main_prog->nr_reloc = new_cnt;
6432
return 0;
6433
}
6434
6435
static int
6436
bpf_object__append_subprog_code(struct bpf_object *obj, struct bpf_program *main_prog,
6437
struct bpf_program *subprog)
6438
{
6439
struct bpf_insn *insns;
6440
size_t new_cnt;
6441
int err;
6442
6443
subprog->sub_insn_off = main_prog->insns_cnt;
6444
6445
new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
6446
insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
6447
if (!insns) {
6448
pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
6449
return -ENOMEM;
6450
}
6451
main_prog->insns = insns;
6452
main_prog->insns_cnt = new_cnt;
6453
6454
memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
6455
subprog->insns_cnt * sizeof(*insns));
6456
6457
pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
6458
main_prog->name, subprog->insns_cnt, subprog->name);
6459
6460
/* The subprog insns are now appended. Append its relos too. */
6461
err = append_subprog_relos(main_prog, subprog);
6462
if (err)
6463
return err;
6464
return 0;
6465
}
6466
6467
static int
6468
bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
6469
struct bpf_program *prog)
6470
{
6471
size_t sub_insn_idx, insn_idx;
6472
struct bpf_program *subprog;
6473
struct reloc_desc *relo;
6474
struct bpf_insn *insn;
6475
int err;
6476
6477
err = reloc_prog_func_and_line_info(obj, main_prog, prog);
6478
if (err)
6479
return err;
6480
6481
for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) {
6482
insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
6483
if (!insn_is_subprog_call(insn) && !insn_is_pseudo_func(insn))
6484
continue;
6485
6486
relo = find_prog_insn_relo(prog, insn_idx);
6487
if (relo && relo->type == RELO_EXTERN_CALL)
6488
/* kfunc relocations will be handled later
6489
* in bpf_object__relocate_data()
6490
*/
6491
continue;
6492
if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) {
6493
pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
6494
prog->name, insn_idx, relo->type);
6495
return -LIBBPF_ERRNO__RELOC;
6496
}
6497
if (relo) {
6498
/* sub-program instruction index is a combination of
6499
* an offset of a symbol pointed to by relocation and
6500
* call instruction's imm field; for global functions,
6501
* call always has imm = -1, but for static functions
6502
* relocation is against STT_SECTION and insn->imm
6503
* points to a start of a static function
6504
*
6505
* for subprog addr relocation, the relo->sym_off + insn->imm is
6506
* the byte offset in the corresponding section.
6507
*/
6508
if (relo->type == RELO_CALL)
6509
sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
6510
else
6511
sub_insn_idx = (relo->sym_off + insn->imm) / BPF_INSN_SZ;
6512
} else if (insn_is_pseudo_func(insn)) {
6513
/*
6514
* RELO_SUBPROG_ADDR relo is always emitted even if both
6515
* functions are in the same section, so it shouldn't reach here.
6516
*/
6517
pr_warn("prog '%s': missing subprog addr relo for insn #%zu\n",
6518
prog->name, insn_idx);
6519
return -LIBBPF_ERRNO__RELOC;
6520
} else {
6521
/* if subprogram call is to a static function within
6522
* the same ELF section, there won't be any relocation
6523
* emitted, but it also means there is no additional
6524
* offset necessary, insns->imm is relative to
6525
* instruction's original position within the section
6526
*/
6527
sub_insn_idx = prog->sec_insn_off + insn_idx + insn->imm + 1;
6528
}
6529
6530
/* we enforce that sub-programs should be in .text section */
6531
subprog = find_prog_by_sec_insn(obj, obj->efile.text_shndx, sub_insn_idx);
6532
if (!subprog) {
6533
pr_warn("prog '%s': no .text section found yet sub-program call exists\n",
6534
prog->name);
6535
return -LIBBPF_ERRNO__RELOC;
6536
}
6537
6538
/* if it's the first call instruction calling into this
6539
* subprogram (meaning this subprog hasn't been processed
6540
* yet) within the context of current main program:
6541
* - append it at the end of main program's instructions blog;
6542
* - process is recursively, while current program is put on hold;
6543
* - if that subprogram calls some other not yet processes
6544
* subprogram, same thing will happen recursively until
6545
* there are no more unprocesses subprograms left to append
6546
* and relocate.
6547
*/
6548
if (subprog->sub_insn_off == 0) {
6549
err = bpf_object__append_subprog_code(obj, main_prog, subprog);
6550
if (err)
6551
return err;
6552
err = bpf_object__reloc_code(obj, main_prog, subprog);
6553
if (err)
6554
return err;
6555
}
6556
6557
/* main_prog->insns memory could have been re-allocated, so
6558
* calculate pointer again
6559
*/
6560
insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
6561
/* calculate correct instruction position within current main
6562
* prog; each main prog can have a different set of
6563
* subprograms appended (potentially in different order as
6564
* well), so position of any subprog can be different for
6565
* different main programs
6566
*/
6567
insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1;
6568
6569
pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n",
6570
prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off);
6571
}
6572
6573
return 0;
6574
}
6575
6576
/*
6577
* Relocate sub-program calls.
6578
*
6579
* Algorithm operates as follows. Each entry-point BPF program (referred to as
6580
* main prog) is processed separately. For each subprog (non-entry functions,
6581
* that can be called from either entry progs or other subprogs) gets their
6582
* sub_insn_off reset to zero. This serves as indicator that this subprogram
6583
* hasn't been yet appended and relocated within current main prog. Once its
6584
* relocated, sub_insn_off will point at the position within current main prog
6585
* where given subprog was appended. This will further be used to relocate all
6586
* the call instructions jumping into this subprog.
6587
*
6588
* We start with main program and process all call instructions. If the call
6589
* is into a subprog that hasn't been processed (i.e., subprog->sub_insn_off
6590
* is zero), subprog instructions are appended at the end of main program's
6591
* instruction array. Then main program is "put on hold" while we recursively
6592
* process newly appended subprogram. If that subprogram calls into another
6593
* subprogram that hasn't been appended, new subprogram is appended again to
6594
* the *main* prog's instructions (subprog's instructions are always left
6595
* untouched, as they need to be in unmodified state for subsequent main progs
6596
* and subprog instructions are always sent only as part of a main prog) and
6597
* the process continues recursively. Once all the subprogs called from a main
6598
* prog or any of its subprogs are appended (and relocated), all their
6599
* positions within finalized instructions array are known, so it's easy to
6600
* rewrite call instructions with correct relative offsets, corresponding to
6601
* desired target subprog.
6602
*
6603
* Its important to realize that some subprogs might not be called from some
6604
* main prog and any of its called/used subprogs. Those will keep their
6605
* subprog->sub_insn_off as zero at all times and won't be appended to current
6606
* main prog and won't be relocated within the context of current main prog.
6607
* They might still be used from other main progs later.
6608
*
6609
* Visually this process can be shown as below. Suppose we have two main
6610
* programs mainA and mainB and BPF object contains three subprogs: subA,
6611
* subB, and subC. mainA calls only subA, mainB calls only subC, but subA and
6612
* subC both call subB:
6613
*
6614
* +--------+ +-------+
6615
* | v v |
6616
* +--+---+ +--+-+-+ +---+--+
6617
* | subA | | subB | | subC |
6618
* +--+---+ +------+ +---+--+
6619
* ^ ^
6620
* | |
6621
* +---+-------+ +------+----+
6622
* | mainA | | mainB |
6623
* +-----------+ +-----------+
6624
*
6625
* We'll start relocating mainA, will find subA, append it and start
6626
* processing sub A recursively:
6627
*
6628
* +-----------+------+
6629
* | mainA | subA |
6630
* +-----------+------+
6631
*
6632
* At this point we notice that subB is used from subA, so we append it and
6633
* relocate (there are no further subcalls from subB):
6634
*
6635
* +-----------+------+------+
6636
* | mainA | subA | subB |
6637
* +-----------+------+------+
6638
*
6639
* At this point, we relocate subA calls, then go one level up and finish with
6640
* relocatin mainA calls. mainA is done.
6641
*
6642
* For mainB process is similar but results in different order. We start with
6643
* mainB and skip subA and subB, as mainB never calls them (at least
6644
* directly), but we see subC is needed, so we append and start processing it:
6645
*
6646
* +-----------+------+
6647
* | mainB | subC |
6648
* +-----------+------+
6649
* Now we see subC needs subB, so we go back to it, append and relocate it:
6650
*
6651
* +-----------+------+------+
6652
* | mainB | subC | subB |
6653
* +-----------+------+------+
6654
*
6655
* At this point we unwind recursion, relocate calls in subC, then in mainB.
6656
*/
6657
static int
6658
bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog)
6659
{
6660
struct bpf_program *subprog;
6661
int i, err;
6662
6663
/* mark all subprogs as not relocated (yet) within the context of
6664
* current main program
6665
*/
6666
for (i = 0; i < obj->nr_programs; i++) {
6667
subprog = &obj->programs[i];
6668
if (!prog_is_subprog(obj, subprog))
6669
continue;
6670
6671
subprog->sub_insn_off = 0;
6672
}
6673
6674
err = bpf_object__reloc_code(obj, prog, prog);
6675
if (err)
6676
return err;
6677
6678
return 0;
6679
}
6680
6681
static void
6682
bpf_object__free_relocs(struct bpf_object *obj)
6683
{
6684
struct bpf_program *prog;
6685
int i;
6686
6687
/* free up relocation descriptors */
6688
for (i = 0; i < obj->nr_programs; i++) {
6689
prog = &obj->programs[i];
6690
zfree(&prog->reloc_desc);
6691
prog->nr_reloc = 0;
6692
}
6693
}
6694
6695
static int cmp_relocs(const void *_a, const void *_b)
6696
{
6697
const struct reloc_desc *a = _a;
6698
const struct reloc_desc *b = _b;
6699
6700
if (a->insn_idx != b->insn_idx)
6701
return a->insn_idx < b->insn_idx ? -1 : 1;
6702
6703
/* no two relocations should have the same insn_idx, but ... */
6704
if (a->type != b->type)
6705
return a->type < b->type ? -1 : 1;
6706
6707
return 0;
6708
}
6709
6710
static void bpf_object__sort_relos(struct bpf_object *obj)
6711
{
6712
int i;
6713
6714
for (i = 0; i < obj->nr_programs; i++) {
6715
struct bpf_program *p = &obj->programs[i];
6716
6717
if (!p->nr_reloc)
6718
continue;
6719
6720
qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs);
6721
}
6722
}
6723
6724
static int bpf_prog_assign_exc_cb(struct bpf_object *obj, struct bpf_program *prog)
6725
{
6726
const char *str = "exception_callback:";
6727
size_t pfx_len = strlen(str);
6728
int i, j, n;
6729
6730
if (!obj->btf || !kernel_supports(obj, FEAT_BTF_DECL_TAG))
6731
return 0;
6732
6733
n = btf__type_cnt(obj->btf);
6734
for (i = 1; i < n; i++) {
6735
const char *name;
6736
struct btf_type *t;
6737
6738
t = btf_type_by_id(obj->btf, i);
6739
if (!btf_is_decl_tag(t) || btf_decl_tag(t)->component_idx != -1)
6740
continue;
6741
6742
name = btf__str_by_offset(obj->btf, t->name_off);
6743
if (strncmp(name, str, pfx_len) != 0)
6744
continue;
6745
6746
t = btf_type_by_id(obj->btf, t->type);
6747
if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) {
6748
pr_warn("prog '%s': exception_callback:<value> decl tag not applied to the main program\n",
6749
prog->name);
6750
return -EINVAL;
6751
}
6752
if (strcmp(prog->name, btf__str_by_offset(obj->btf, t->name_off)) != 0)
6753
continue;
6754
/* Multiple callbacks are specified for the same prog,
6755
* the verifier will eventually return an error for this
6756
* case, hence simply skip appending a subprog.
6757
*/
6758
if (prog->exception_cb_idx >= 0) {
6759
prog->exception_cb_idx = -1;
6760
break;
6761
}
6762
6763
name += pfx_len;
6764
if (str_is_empty(name)) {
6765
pr_warn("prog '%s': exception_callback:<value> decl tag contains empty value\n",
6766
prog->name);
6767
return -EINVAL;
6768
}
6769
6770
for (j = 0; j < obj->nr_programs; j++) {
6771
struct bpf_program *subprog = &obj->programs[j];
6772
6773
if (!prog_is_subprog(obj, subprog))
6774
continue;
6775
if (strcmp(name, subprog->name) != 0)
6776
continue;
6777
/* Enforce non-hidden, as from verifier point of
6778
* view it expects global functions, whereas the
6779
* mark_btf_static fixes up linkage as static.
6780
*/
6781
if (!subprog->sym_global || subprog->mark_btf_static) {
6782
pr_warn("prog '%s': exception callback %s must be a global non-hidden function\n",
6783
prog->name, subprog->name);
6784
return -EINVAL;
6785
}
6786
/* Let's see if we already saw a static exception callback with the same name */
6787
if (prog->exception_cb_idx >= 0) {
6788
pr_warn("prog '%s': multiple subprogs with same name as exception callback '%s'\n",
6789
prog->name, subprog->name);
6790
return -EINVAL;
6791
}
6792
prog->exception_cb_idx = j;
6793
break;
6794
}
6795
6796
if (prog->exception_cb_idx >= 0)
6797
continue;
6798
6799
pr_warn("prog '%s': cannot find exception callback '%s'\n", prog->name, name);
6800
return -ENOENT;
6801
}
6802
6803
return 0;
6804
}
6805
6806
static struct {
6807
enum bpf_prog_type prog_type;
6808
const char *ctx_name;
6809
} global_ctx_map[] = {
6810
{ BPF_PROG_TYPE_CGROUP_DEVICE, "bpf_cgroup_dev_ctx" },
6811
{ BPF_PROG_TYPE_CGROUP_SKB, "__sk_buff" },
6812
{ BPF_PROG_TYPE_CGROUP_SOCK, "bpf_sock" },
6813
{ BPF_PROG_TYPE_CGROUP_SOCK_ADDR, "bpf_sock_addr" },
6814
{ BPF_PROG_TYPE_CGROUP_SOCKOPT, "bpf_sockopt" },
6815
{ BPF_PROG_TYPE_CGROUP_SYSCTL, "bpf_sysctl" },
6816
{ BPF_PROG_TYPE_FLOW_DISSECTOR, "__sk_buff" },
6817
{ BPF_PROG_TYPE_KPROBE, "bpf_user_pt_regs_t" },
6818
{ BPF_PROG_TYPE_LWT_IN, "__sk_buff" },
6819
{ BPF_PROG_TYPE_LWT_OUT, "__sk_buff" },
6820
{ BPF_PROG_TYPE_LWT_SEG6LOCAL, "__sk_buff" },
6821
{ BPF_PROG_TYPE_LWT_XMIT, "__sk_buff" },
6822
{ BPF_PROG_TYPE_NETFILTER, "bpf_nf_ctx" },
6823
{ BPF_PROG_TYPE_PERF_EVENT, "bpf_perf_event_data" },
6824
{ BPF_PROG_TYPE_RAW_TRACEPOINT, "bpf_raw_tracepoint_args" },
6825
{ BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, "bpf_raw_tracepoint_args" },
6826
{ BPF_PROG_TYPE_SCHED_ACT, "__sk_buff" },
6827
{ BPF_PROG_TYPE_SCHED_CLS, "__sk_buff" },
6828
{ BPF_PROG_TYPE_SK_LOOKUP, "bpf_sk_lookup" },
6829
{ BPF_PROG_TYPE_SK_MSG, "sk_msg_md" },
6830
{ BPF_PROG_TYPE_SK_REUSEPORT, "sk_reuseport_md" },
6831
{ BPF_PROG_TYPE_SK_SKB, "__sk_buff" },
6832
{ BPF_PROG_TYPE_SOCK_OPS, "bpf_sock_ops" },
6833
{ BPF_PROG_TYPE_SOCKET_FILTER, "__sk_buff" },
6834
{ BPF_PROG_TYPE_XDP, "xdp_md" },
6835
/* all other program types don't have "named" context structs */
6836
};
6837
6838
/* forward declarations for arch-specific underlying types of bpf_user_pt_regs_t typedef,
6839
* for below __builtin_types_compatible_p() checks;
6840
* with this approach we don't need any extra arch-specific #ifdef guards
6841
*/
6842
struct pt_regs;
6843
struct user_pt_regs;
6844
struct user_regs_struct;
6845
6846
static bool need_func_arg_type_fixup(const struct btf *btf, const struct bpf_program *prog,
6847
const char *subprog_name, int arg_idx,
6848
int arg_type_id, const char *ctx_name)
6849
{
6850
const struct btf_type *t;
6851
const char *tname;
6852
6853
/* check if existing parameter already matches verifier expectations */
6854
t = skip_mods_and_typedefs(btf, arg_type_id, NULL);
6855
if (!btf_is_ptr(t))
6856
goto out_warn;
6857
6858
/* typedef bpf_user_pt_regs_t is a special PITA case, valid for kprobe
6859
* and perf_event programs, so check this case early on and forget
6860
* about it for subsequent checks
6861
*/
6862
while (btf_is_mod(t))
6863
t = btf__type_by_id(btf, t->type);
6864
if (btf_is_typedef(t) &&
6865
(prog->type == BPF_PROG_TYPE_KPROBE || prog->type == BPF_PROG_TYPE_PERF_EVENT)) {
6866
tname = btf__str_by_offset(btf, t->name_off) ?: "<anon>";
6867
if (strcmp(tname, "bpf_user_pt_regs_t") == 0)
6868
return false; /* canonical type for kprobe/perf_event */
6869
}
6870
6871
/* now we can ignore typedefs moving forward */
6872
t = skip_mods_and_typedefs(btf, t->type, NULL);
6873
6874
/* if it's `void *`, definitely fix up BTF info */
6875
if (btf_is_void(t))
6876
return true;
6877
6878
/* if it's already proper canonical type, no need to fix up */
6879
tname = btf__str_by_offset(btf, t->name_off) ?: "<anon>";
6880
if (btf_is_struct(t) && strcmp(tname, ctx_name) == 0)
6881
return false;
6882
6883
/* special cases */
6884
switch (prog->type) {
6885
case BPF_PROG_TYPE_KPROBE:
6886
/* `struct pt_regs *` is expected, but we need to fix up */
6887
if (btf_is_struct(t) && strcmp(tname, "pt_regs") == 0)
6888
return true;
6889
break;
6890
case BPF_PROG_TYPE_PERF_EVENT:
6891
if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct pt_regs) &&
6892
btf_is_struct(t) && strcmp(tname, "pt_regs") == 0)
6893
return true;
6894
if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_pt_regs) &&
6895
btf_is_struct(t) && strcmp(tname, "user_pt_regs") == 0)
6896
return true;
6897
if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_regs_struct) &&
6898
btf_is_struct(t) && strcmp(tname, "user_regs_struct") == 0)
6899
return true;
6900
break;
6901
case BPF_PROG_TYPE_RAW_TRACEPOINT:
6902
case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
6903
/* allow u64* as ctx */
6904
if (btf_is_int(t) && t->size == 8)
6905
return true;
6906
break;
6907
default:
6908
break;
6909
}
6910
6911
out_warn:
6912
pr_warn("prog '%s': subprog '%s' arg#%d is expected to be of `struct %s *` type\n",
6913
prog->name, subprog_name, arg_idx, ctx_name);
6914
return false;
6915
}
6916
6917
static int clone_func_btf_info(struct btf *btf, int orig_fn_id, struct bpf_program *prog)
6918
{
6919
int fn_id, fn_proto_id, ret_type_id, orig_proto_id;
6920
int i, err, arg_cnt, fn_name_off, linkage;
6921
struct btf_type *fn_t, *fn_proto_t, *t;
6922
struct btf_param *p;
6923
6924
/* caller already validated FUNC -> FUNC_PROTO validity */
6925
fn_t = btf_type_by_id(btf, orig_fn_id);
6926
fn_proto_t = btf_type_by_id(btf, fn_t->type);
6927
6928
/* Note that each btf__add_xxx() operation invalidates
6929
* all btf_type and string pointers, so we need to be
6930
* very careful when cloning BTF types. BTF type
6931
* pointers have to be always refetched. And to avoid
6932
* problems with invalidated string pointers, we
6933
* add empty strings initially, then just fix up
6934
* name_off offsets in place. Offsets are stable for
6935
* existing strings, so that works out.
6936
*/
6937
fn_name_off = fn_t->name_off; /* we are about to invalidate fn_t */
6938
linkage = btf_func_linkage(fn_t);
6939
orig_proto_id = fn_t->type; /* original FUNC_PROTO ID */
6940
ret_type_id = fn_proto_t->type; /* fn_proto_t will be invalidated */
6941
arg_cnt = btf_vlen(fn_proto_t);
6942
6943
/* clone FUNC_PROTO and its params */
6944
fn_proto_id = btf__add_func_proto(btf, ret_type_id);
6945
if (fn_proto_id < 0)
6946
return -EINVAL;
6947
6948
for (i = 0; i < arg_cnt; i++) {
6949
int name_off;
6950
6951
/* copy original parameter data */
6952
t = btf_type_by_id(btf, orig_proto_id);
6953
p = &btf_params(t)[i];
6954
name_off = p->name_off;
6955
6956
err = btf__add_func_param(btf, "", p->type);
6957
if (err)
6958
return err;
6959
6960
fn_proto_t = btf_type_by_id(btf, fn_proto_id);
6961
p = &btf_params(fn_proto_t)[i];
6962
p->name_off = name_off; /* use remembered str offset */
6963
}
6964
6965
/* clone FUNC now, btf__add_func() enforces non-empty name, so use
6966
* entry program's name as a placeholder, which we replace immediately
6967
* with original name_off
6968
*/
6969
fn_id = btf__add_func(btf, prog->name, linkage, fn_proto_id);
6970
if (fn_id < 0)
6971
return -EINVAL;
6972
6973
fn_t = btf_type_by_id(btf, fn_id);
6974
fn_t->name_off = fn_name_off; /* reuse original string */
6975
6976
return fn_id;
6977
}
6978
6979
/* Check if main program or global subprog's function prototype has `arg:ctx`
6980
* argument tags, and, if necessary, substitute correct type to match what BPF
6981
* verifier would expect, taking into account specific program type. This
6982
* allows to support __arg_ctx tag transparently on old kernels that don't yet
6983
* have a native support for it in the verifier, making user's life much
6984
* easier.
6985
*/
6986
static int bpf_program_fixup_func_info(struct bpf_object *obj, struct bpf_program *prog)
6987
{
6988
const char *ctx_name = NULL, *ctx_tag = "arg:ctx", *fn_name;
6989
struct bpf_func_info_min *func_rec;
6990
struct btf_type *fn_t, *fn_proto_t;
6991
struct btf *btf = obj->btf;
6992
const struct btf_type *t;
6993
struct btf_param *p;
6994
int ptr_id = 0, struct_id, tag_id, orig_fn_id;
6995
int i, n, arg_idx, arg_cnt, err, rec_idx;
6996
int *orig_ids;
6997
6998
/* no .BTF.ext, no problem */
6999
if (!obj->btf_ext || !prog->func_info)
7000
return 0;
7001
7002
/* don't do any fix ups if kernel natively supports __arg_ctx */
7003
if (kernel_supports(obj, FEAT_ARG_CTX_TAG))
7004
return 0;
7005
7006
/* some BPF program types just don't have named context structs, so
7007
* this fallback mechanism doesn't work for them
7008
*/
7009
for (i = 0; i < ARRAY_SIZE(global_ctx_map); i++) {
7010
if (global_ctx_map[i].prog_type != prog->type)
7011
continue;
7012
ctx_name = global_ctx_map[i].ctx_name;
7013
break;
7014
}
7015
if (!ctx_name)
7016
return 0;
7017
7018
/* remember original func BTF IDs to detect if we already cloned them */
7019
orig_ids = calloc(prog->func_info_cnt, sizeof(*orig_ids));
7020
if (!orig_ids)
7021
return -ENOMEM;
7022
for (i = 0; i < prog->func_info_cnt; i++) {
7023
func_rec = prog->func_info + prog->func_info_rec_size * i;
7024
orig_ids[i] = func_rec->type_id;
7025
}
7026
7027
/* go through each DECL_TAG with "arg:ctx" and see if it points to one
7028
* of our subprogs; if yes and subprog is global and needs adjustment,
7029
* clone and adjust FUNC -> FUNC_PROTO combo
7030
*/
7031
for (i = 1, n = btf__type_cnt(btf); i < n; i++) {
7032
/* only DECL_TAG with "arg:ctx" value are interesting */
7033
t = btf__type_by_id(btf, i);
7034
if (!btf_is_decl_tag(t))
7035
continue;
7036
if (strcmp(btf__str_by_offset(btf, t->name_off), ctx_tag) != 0)
7037
continue;
7038
7039
/* only global funcs need adjustment, if at all */
7040
orig_fn_id = t->type;
7041
fn_t = btf_type_by_id(btf, orig_fn_id);
7042
if (!btf_is_func(fn_t) || btf_func_linkage(fn_t) != BTF_FUNC_GLOBAL)
7043
continue;
7044
7045
/* sanity check FUNC -> FUNC_PROTO chain, just in case */
7046
fn_proto_t = btf_type_by_id(btf, fn_t->type);
7047
if (!fn_proto_t || !btf_is_func_proto(fn_proto_t))
7048
continue;
7049
7050
/* find corresponding func_info record */
7051
func_rec = NULL;
7052
for (rec_idx = 0; rec_idx < prog->func_info_cnt; rec_idx++) {
7053
if (orig_ids[rec_idx] == t->type) {
7054
func_rec = prog->func_info + prog->func_info_rec_size * rec_idx;
7055
break;
7056
}
7057
}
7058
/* current main program doesn't call into this subprog */
7059
if (!func_rec)
7060
continue;
7061
7062
/* some more sanity checking of DECL_TAG */
7063
arg_cnt = btf_vlen(fn_proto_t);
7064
arg_idx = btf_decl_tag(t)->component_idx;
7065
if (arg_idx < 0 || arg_idx >= arg_cnt)
7066
continue;
7067
7068
/* check if we should fix up argument type */
7069
p = &btf_params(fn_proto_t)[arg_idx];
7070
fn_name = btf__str_by_offset(btf, fn_t->name_off) ?: "<anon>";
7071
if (!need_func_arg_type_fixup(btf, prog, fn_name, arg_idx, p->type, ctx_name))
7072
continue;
7073
7074
/* clone fn/fn_proto, unless we already did it for another arg */
7075
if (func_rec->type_id == orig_fn_id) {
7076
int fn_id;
7077
7078
fn_id = clone_func_btf_info(btf, orig_fn_id, prog);
7079
if (fn_id < 0) {
7080
err = fn_id;
7081
goto err_out;
7082
}
7083
7084
/* point func_info record to a cloned FUNC type */
7085
func_rec->type_id = fn_id;
7086
}
7087
7088
/* create PTR -> STRUCT type chain to mark PTR_TO_CTX argument;
7089
* we do it just once per main BPF program, as all global
7090
* funcs share the same program type, so need only PTR ->
7091
* STRUCT type chain
7092
*/
7093
if (ptr_id == 0) {
7094
struct_id = btf__add_struct(btf, ctx_name, 0);
7095
ptr_id = btf__add_ptr(btf, struct_id);
7096
if (ptr_id < 0 || struct_id < 0) {
7097
err = -EINVAL;
7098
goto err_out;
7099
}
7100
}
7101
7102
/* for completeness, clone DECL_TAG and point it to cloned param */
7103
tag_id = btf__add_decl_tag(btf, ctx_tag, func_rec->type_id, arg_idx);
7104
if (tag_id < 0) {
7105
err = -EINVAL;
7106
goto err_out;
7107
}
7108
7109
/* all the BTF manipulations invalidated pointers, refetch them */
7110
fn_t = btf_type_by_id(btf, func_rec->type_id);
7111
fn_proto_t = btf_type_by_id(btf, fn_t->type);
7112
7113
/* fix up type ID pointed to by param */
7114
p = &btf_params(fn_proto_t)[arg_idx];
7115
p->type = ptr_id;
7116
}
7117
7118
free(orig_ids);
7119
return 0;
7120
err_out:
7121
free(orig_ids);
7122
return err;
7123
}
7124
7125
static int bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
7126
{
7127
struct bpf_program *prog;
7128
size_t i, j;
7129
int err;
7130
7131
if (obj->btf_ext) {
7132
err = bpf_object__relocate_core(obj, targ_btf_path);
7133
if (err) {
7134
pr_warn("failed to perform CO-RE relocations: %s\n",
7135
errstr(err));
7136
return err;
7137
}
7138
bpf_object__sort_relos(obj);
7139
}
7140
7141
/* Before relocating calls pre-process relocations and mark
7142
* few ld_imm64 instructions that points to subprogs.
7143
* Otherwise bpf_object__reloc_code() later would have to consider
7144
* all ld_imm64 insns as relocation candidates. That would
7145
* reduce relocation speed, since amount of find_prog_insn_relo()
7146
* would increase and most of them will fail to find a relo.
7147
*/
7148
for (i = 0; i < obj->nr_programs; i++) {
7149
prog = &obj->programs[i];
7150
for (j = 0; j < prog->nr_reloc; j++) {
7151
struct reloc_desc *relo = &prog->reloc_desc[j];
7152
struct bpf_insn *insn = &prog->insns[relo->insn_idx];
7153
7154
/* mark the insn, so it's recognized by insn_is_pseudo_func() */
7155
if (relo->type == RELO_SUBPROG_ADDR)
7156
insn[0].src_reg = BPF_PSEUDO_FUNC;
7157
}
7158
}
7159
7160
/* relocate subprogram calls and append used subprograms to main
7161
* programs; each copy of subprogram code needs to be relocated
7162
* differently for each main program, because its code location might
7163
* have changed.
7164
* Append subprog relos to main programs to allow data relos to be
7165
* processed after text is completely relocated.
7166
*/
7167
for (i = 0; i < obj->nr_programs; i++) {
7168
prog = &obj->programs[i];
7169
/* sub-program's sub-calls are relocated within the context of
7170
* its main program only
7171
*/
7172
if (prog_is_subprog(obj, prog))
7173
continue;
7174
if (!prog->autoload)
7175
continue;
7176
7177
err = bpf_object__relocate_calls(obj, prog);
7178
if (err) {
7179
pr_warn("prog '%s': failed to relocate calls: %s\n",
7180
prog->name, errstr(err));
7181
return err;
7182
}
7183
7184
err = bpf_prog_assign_exc_cb(obj, prog);
7185
if (err)
7186
return err;
7187
/* Now, also append exception callback if it has not been done already. */
7188
if (prog->exception_cb_idx >= 0) {
7189
struct bpf_program *subprog = &obj->programs[prog->exception_cb_idx];
7190
7191
/* Calling exception callback directly is disallowed, which the
7192
* verifier will reject later. In case it was processed already,
7193
* we can skip this step, otherwise for all other valid cases we
7194
* have to append exception callback now.
7195
*/
7196
if (subprog->sub_insn_off == 0) {
7197
err = bpf_object__append_subprog_code(obj, prog, subprog);
7198
if (err)
7199
return err;
7200
err = bpf_object__reloc_code(obj, prog, subprog);
7201
if (err)
7202
return err;
7203
}
7204
}
7205
}
7206
for (i = 0; i < obj->nr_programs; i++) {
7207
prog = &obj->programs[i];
7208
if (prog_is_subprog(obj, prog))
7209
continue;
7210
if (!prog->autoload)
7211
continue;
7212
7213
/* Process data relos for main programs */
7214
err = bpf_object__relocate_data(obj, prog);
7215
if (err) {
7216
pr_warn("prog '%s': failed to relocate data references: %s\n",
7217
prog->name, errstr(err));
7218
return err;
7219
}
7220
7221
/* Fix up .BTF.ext information, if necessary */
7222
err = bpf_program_fixup_func_info(obj, prog);
7223
if (err) {
7224
pr_warn("prog '%s': failed to perform .BTF.ext fix ups: %s\n",
7225
prog->name, errstr(err));
7226
return err;
7227
}
7228
}
7229
7230
return 0;
7231
}
7232
7233
static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
7234
Elf64_Shdr *shdr, Elf_Data *data);
7235
7236
static int bpf_object__collect_map_relos(struct bpf_object *obj,
7237
Elf64_Shdr *shdr, Elf_Data *data)
7238
{
7239
const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *);
7240
int i, j, nrels, new_sz;
7241
const struct btf_var_secinfo *vi = NULL;
7242
const struct btf_type *sec, *var, *def;
7243
struct bpf_map *map = NULL, *targ_map = NULL;
7244
struct bpf_program *targ_prog = NULL;
7245
bool is_prog_array, is_map_in_map;
7246
const struct btf_member *member;
7247
const char *name, *mname, *type;
7248
unsigned int moff;
7249
Elf64_Sym *sym;
7250
Elf64_Rel *rel;
7251
void *tmp;
7252
7253
if (!obj->efile.btf_maps_sec_btf_id || !obj->btf)
7254
return -EINVAL;
7255
sec = btf__type_by_id(obj->btf, obj->efile.btf_maps_sec_btf_id);
7256
if (!sec)
7257
return -EINVAL;
7258
7259
nrels = shdr->sh_size / shdr->sh_entsize;
7260
for (i = 0; i < nrels; i++) {
7261
rel = elf_rel_by_idx(data, i);
7262
if (!rel) {
7263
pr_warn(".maps relo #%d: failed to get ELF relo\n", i);
7264
return -LIBBPF_ERRNO__FORMAT;
7265
}
7266
7267
sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info));
7268
if (!sym) {
7269
pr_warn(".maps relo #%d: symbol %zx not found\n",
7270
i, (size_t)ELF64_R_SYM(rel->r_info));
7271
return -LIBBPF_ERRNO__FORMAT;
7272
}
7273
name = elf_sym_str(obj, sym->st_name) ?: "<?>";
7274
7275
pr_debug(".maps relo #%d: for %zd value %zd rel->r_offset %zu name %d ('%s')\n",
7276
i, (ssize_t)(rel->r_info >> 32), (size_t)sym->st_value,
7277
(size_t)rel->r_offset, sym->st_name, name);
7278
7279
for (j = 0; j < obj->nr_maps; j++) {
7280
map = &obj->maps[j];
7281
if (map->sec_idx != obj->efile.btf_maps_shndx)
7282
continue;
7283
7284
vi = btf_var_secinfos(sec) + map->btf_var_idx;
7285
if (vi->offset <= rel->r_offset &&
7286
rel->r_offset + bpf_ptr_sz <= vi->offset + vi->size)
7287
break;
7288
}
7289
if (j == obj->nr_maps) {
7290
pr_warn(".maps relo #%d: cannot find map '%s' at rel->r_offset %zu\n",
7291
i, name, (size_t)rel->r_offset);
7292
return -EINVAL;
7293
}
7294
7295
is_map_in_map = bpf_map_type__is_map_in_map(map->def.type);
7296
is_prog_array = map->def.type == BPF_MAP_TYPE_PROG_ARRAY;
7297
type = is_map_in_map ? "map" : "prog";
7298
if (is_map_in_map) {
7299
if (sym->st_shndx != obj->efile.btf_maps_shndx) {
7300
pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n",
7301
i, name);
7302
return -LIBBPF_ERRNO__RELOC;
7303
}
7304
if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS &&
7305
map->def.key_size != sizeof(int)) {
7306
pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n",
7307
i, map->name, sizeof(int));
7308
return -EINVAL;
7309
}
7310
targ_map = bpf_object__find_map_by_name(obj, name);
7311
if (!targ_map) {
7312
pr_warn(".maps relo #%d: '%s' isn't a valid map reference\n",
7313
i, name);
7314
return -ESRCH;
7315
}
7316
} else if (is_prog_array) {
7317
targ_prog = bpf_object__find_program_by_name(obj, name);
7318
if (!targ_prog) {
7319
pr_warn(".maps relo #%d: '%s' isn't a valid program reference\n",
7320
i, name);
7321
return -ESRCH;
7322
}
7323
if (targ_prog->sec_idx != sym->st_shndx ||
7324
targ_prog->sec_insn_off * 8 != sym->st_value ||
7325
prog_is_subprog(obj, targ_prog)) {
7326
pr_warn(".maps relo #%d: '%s' isn't an entry-point program\n",
7327
i, name);
7328
return -LIBBPF_ERRNO__RELOC;
7329
}
7330
} else {
7331
return -EINVAL;
7332
}
7333
7334
var = btf__type_by_id(obj->btf, vi->type);
7335
def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
7336
if (btf_vlen(def) == 0)
7337
return -EINVAL;
7338
member = btf_members(def) + btf_vlen(def) - 1;
7339
mname = btf__name_by_offset(obj->btf, member->name_off);
7340
if (strcmp(mname, "values"))
7341
return -EINVAL;
7342
7343
moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8;
7344
if (rel->r_offset - vi->offset < moff)
7345
return -EINVAL;
7346
7347
moff = rel->r_offset - vi->offset - moff;
7348
/* here we use BPF pointer size, which is always 64 bit, as we
7349
* are parsing ELF that was built for BPF target
7350
*/
7351
if (moff % bpf_ptr_sz)
7352
return -EINVAL;
7353
moff /= bpf_ptr_sz;
7354
if (moff >= map->init_slots_sz) {
7355
new_sz = moff + 1;
7356
tmp = libbpf_reallocarray(map->init_slots, new_sz, host_ptr_sz);
7357
if (!tmp)
7358
return -ENOMEM;
7359
map->init_slots = tmp;
7360
memset(map->init_slots + map->init_slots_sz, 0,
7361
(new_sz - map->init_slots_sz) * host_ptr_sz);
7362
map->init_slots_sz = new_sz;
7363
}
7364
map->init_slots[moff] = is_map_in_map ? (void *)targ_map : (void *)targ_prog;
7365
7366
pr_debug(".maps relo #%d: map '%s' slot [%d] points to %s '%s'\n",
7367
i, map->name, moff, type, name);
7368
}
7369
7370
return 0;
7371
}
7372
7373
static int bpf_object__collect_relos(struct bpf_object *obj)
7374
{
7375
int i, err;
7376
7377
for (i = 0; i < obj->efile.sec_cnt; i++) {
7378
struct elf_sec_desc *sec_desc = &obj->efile.secs[i];
7379
Elf64_Shdr *shdr;
7380
Elf_Data *data;
7381
int idx;
7382
7383
if (sec_desc->sec_type != SEC_RELO)
7384
continue;
7385
7386
shdr = sec_desc->shdr;
7387
data = sec_desc->data;
7388
idx = shdr->sh_info;
7389
7390
if (shdr->sh_type != SHT_REL || idx < 0 || idx >= obj->efile.sec_cnt) {
7391
pr_warn("internal error at %d\n", __LINE__);
7392
return -LIBBPF_ERRNO__INTERNAL;
7393
}
7394
7395
if (obj->efile.secs[idx].sec_type == SEC_ST_OPS)
7396
err = bpf_object__collect_st_ops_relos(obj, shdr, data);
7397
else if (idx == obj->efile.btf_maps_shndx)
7398
err = bpf_object__collect_map_relos(obj, shdr, data);
7399
else
7400
err = bpf_object__collect_prog_relos(obj, shdr, data);
7401
if (err)
7402
return err;
7403
}
7404
7405
bpf_object__sort_relos(obj);
7406
return 0;
7407
}
7408
7409
static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id)
7410
{
7411
if (BPF_CLASS(insn->code) == BPF_JMP &&
7412
BPF_OP(insn->code) == BPF_CALL &&
7413
BPF_SRC(insn->code) == BPF_K &&
7414
insn->src_reg == 0 &&
7415
insn->dst_reg == 0) {
7416
*func_id = insn->imm;
7417
return true;
7418
}
7419
return false;
7420
}
7421
7422
static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program *prog)
7423
{
7424
struct bpf_insn *insn = prog->insns;
7425
enum bpf_func_id func_id;
7426
int i;
7427
7428
if (obj->gen_loader)
7429
return 0;
7430
7431
for (i = 0; i < prog->insns_cnt; i++, insn++) {
7432
if (!insn_is_helper_call(insn, &func_id))
7433
continue;
7434
7435
/* on kernels that don't yet support
7436
* bpf_probe_read_{kernel,user}[_str] helpers, fall back
7437
* to bpf_probe_read() which works well for old kernels
7438
*/
7439
switch (func_id) {
7440
case BPF_FUNC_probe_read_kernel:
7441
case BPF_FUNC_probe_read_user:
7442
if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
7443
insn->imm = BPF_FUNC_probe_read;
7444
break;
7445
case BPF_FUNC_probe_read_kernel_str:
7446
case BPF_FUNC_probe_read_user_str:
7447
if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
7448
insn->imm = BPF_FUNC_probe_read_str;
7449
break;
7450
default:
7451
break;
7452
}
7453
}
7454
return 0;
7455
}
7456
7457
static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name,
7458
int *btf_obj_fd, int *btf_type_id);
7459
7460
/* this is called as prog->sec_def->prog_prepare_load_fn for libbpf-supported sec_defs */
7461
static int libbpf_prepare_prog_load(struct bpf_program *prog,
7462
struct bpf_prog_load_opts *opts, long cookie)
7463
{
7464
enum sec_def_flags def = cookie;
7465
7466
/* old kernels might not support specifying expected_attach_type */
7467
if ((def & SEC_EXP_ATTACH_OPT) && !kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE))
7468
opts->expected_attach_type = 0;
7469
7470
if (def & SEC_SLEEPABLE)
7471
opts->prog_flags |= BPF_F_SLEEPABLE;
7472
7473
if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS))
7474
opts->prog_flags |= BPF_F_XDP_HAS_FRAGS;
7475
7476
/* special check for usdt to use uprobe_multi link */
7477
if ((def & SEC_USDT) && kernel_supports(prog->obj, FEAT_UPROBE_MULTI_LINK)) {
7478
/* for BPF_TRACE_UPROBE_MULTI, user might want to query expected_attach_type
7479
* in prog, and expected_attach_type we set in kernel is from opts, so we
7480
* update both.
7481
*/
7482
prog->expected_attach_type = BPF_TRACE_UPROBE_MULTI;
7483
opts->expected_attach_type = BPF_TRACE_UPROBE_MULTI;
7484
}
7485
7486
if ((def & SEC_ATTACH_BTF) && !prog->attach_btf_id) {
7487
int btf_obj_fd = 0, btf_type_id = 0, err;
7488
const char *attach_name;
7489
7490
attach_name = strchr(prog->sec_name, '/');
7491
if (!attach_name) {
7492
/* if BPF program is annotated with just SEC("fentry")
7493
* (or similar) without declaratively specifying
7494
* target, then it is expected that target will be
7495
* specified with bpf_program__set_attach_target() at
7496
* runtime before BPF object load step. If not, then
7497
* there is nothing to load into the kernel as BPF
7498
* verifier won't be able to validate BPF program
7499
* correctness anyways.
7500
*/
7501
pr_warn("prog '%s': no BTF-based attach target is specified, use bpf_program__set_attach_target()\n",
7502
prog->name);
7503
return -EINVAL;
7504
}
7505
attach_name++; /* skip over / */
7506
7507
err = libbpf_find_attach_btf_id(prog, attach_name, &btf_obj_fd, &btf_type_id);
7508
if (err)
7509
return err;
7510
7511
/* cache resolved BTF FD and BTF type ID in the prog */
7512
prog->attach_btf_obj_fd = btf_obj_fd;
7513
prog->attach_btf_id = btf_type_id;
7514
7515
/* but by now libbpf common logic is not utilizing
7516
* prog->atach_btf_obj_fd/prog->attach_btf_id anymore because
7517
* this callback is called after opts were populated by
7518
* libbpf, so this callback has to update opts explicitly here
7519
*/
7520
opts->attach_btf_obj_fd = btf_obj_fd;
7521
opts->attach_btf_id = btf_type_id;
7522
}
7523
return 0;
7524
}
7525
7526
static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz);
7527
7528
static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog,
7529
struct bpf_insn *insns, int insns_cnt,
7530
const char *license, __u32 kern_version, int *prog_fd)
7531
{
7532
LIBBPF_OPTS(bpf_prog_load_opts, load_attr);
7533
const char *prog_name = NULL;
7534
size_t log_buf_size = 0;
7535
char *log_buf = NULL, *tmp;
7536
bool own_log_buf = true;
7537
__u32 log_level = prog->log_level;
7538
int ret, err;
7539
7540
/* Be more helpful by rejecting programs that can't be validated early
7541
* with more meaningful and actionable error message.
7542
*/
7543
switch (prog->type) {
7544
case BPF_PROG_TYPE_UNSPEC:
7545
/*
7546
* The program type must be set. Most likely we couldn't find a proper
7547
* section definition at load time, and thus we didn't infer the type.
7548
*/
7549
pr_warn("prog '%s': missing BPF prog type, check ELF section name '%s'\n",
7550
prog->name, prog->sec_name);
7551
return -EINVAL;
7552
case BPF_PROG_TYPE_STRUCT_OPS:
7553
if (prog->attach_btf_id == 0) {
7554
pr_warn("prog '%s': SEC(\"struct_ops\") program isn't referenced anywhere, did you forget to use it?\n",
7555
prog->name);
7556
return -EINVAL;
7557
}
7558
break;
7559
default:
7560
break;
7561
}
7562
7563
if (!insns || !insns_cnt)
7564
return -EINVAL;
7565
7566
if (kernel_supports(obj, FEAT_PROG_NAME))
7567
prog_name = prog->name;
7568
load_attr.attach_prog_fd = prog->attach_prog_fd;
7569
load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd;
7570
load_attr.attach_btf_id = prog->attach_btf_id;
7571
load_attr.kern_version = kern_version;
7572
load_attr.prog_ifindex = prog->prog_ifindex;
7573
load_attr.expected_attach_type = prog->expected_attach_type;
7574
7575
/* specify func_info/line_info only if kernel supports them */
7576
if (obj->btf && btf__fd(obj->btf) >= 0 && kernel_supports(obj, FEAT_BTF_FUNC)) {
7577
load_attr.prog_btf_fd = btf__fd(obj->btf);
7578
load_attr.func_info = prog->func_info;
7579
load_attr.func_info_rec_size = prog->func_info_rec_size;
7580
load_attr.func_info_cnt = prog->func_info_cnt;
7581
load_attr.line_info = prog->line_info;
7582
load_attr.line_info_rec_size = prog->line_info_rec_size;
7583
load_attr.line_info_cnt = prog->line_info_cnt;
7584
}
7585
load_attr.log_level = log_level;
7586
load_attr.prog_flags = prog->prog_flags;
7587
load_attr.fd_array = obj->fd_array;
7588
7589
load_attr.token_fd = obj->token_fd;
7590
if (obj->token_fd)
7591
load_attr.prog_flags |= BPF_F_TOKEN_FD;
7592
7593
/* adjust load_attr if sec_def provides custom preload callback */
7594
if (prog->sec_def && prog->sec_def->prog_prepare_load_fn) {
7595
err = prog->sec_def->prog_prepare_load_fn(prog, &load_attr, prog->sec_def->cookie);
7596
if (err < 0) {
7597
pr_warn("prog '%s': failed to prepare load attributes: %s\n",
7598
prog->name, errstr(err));
7599
return err;
7600
}
7601
insns = prog->insns;
7602
insns_cnt = prog->insns_cnt;
7603
}
7604
7605
if (obj->gen_loader) {
7606
bpf_gen__prog_load(obj->gen_loader, prog->type, prog->name,
7607
license, insns, insns_cnt, &load_attr,
7608
prog - obj->programs);
7609
*prog_fd = -1;
7610
return 0;
7611
}
7612
7613
retry_load:
7614
/* if log_level is zero, we don't request logs initially even if
7615
* custom log_buf is specified; if the program load fails, then we'll
7616
* bump log_level to 1 and use either custom log_buf or we'll allocate
7617
* our own and retry the load to get details on what failed
7618
*/
7619
if (log_level) {
7620
if (prog->log_buf) {
7621
log_buf = prog->log_buf;
7622
log_buf_size = prog->log_size;
7623
own_log_buf = false;
7624
} else if (obj->log_buf) {
7625
log_buf = obj->log_buf;
7626
log_buf_size = obj->log_size;
7627
own_log_buf = false;
7628
} else {
7629
log_buf_size = max((size_t)BPF_LOG_BUF_SIZE, log_buf_size * 2);
7630
tmp = realloc(log_buf, log_buf_size);
7631
if (!tmp) {
7632
ret = -ENOMEM;
7633
goto out;
7634
}
7635
log_buf = tmp;
7636
log_buf[0] = '\0';
7637
own_log_buf = true;
7638
}
7639
}
7640
7641
load_attr.log_buf = log_buf;
7642
load_attr.log_size = log_buf_size;
7643
load_attr.log_level = log_level;
7644
7645
ret = bpf_prog_load(prog->type, prog_name, license, insns, insns_cnt, &load_attr);
7646
if (ret >= 0) {
7647
if (log_level && own_log_buf) {
7648
pr_debug("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n",
7649
prog->name, log_buf);
7650
}
7651
7652
if (obj->has_rodata && kernel_supports(obj, FEAT_PROG_BIND_MAP)) {
7653
struct bpf_map *map;
7654
int i;
7655
7656
for (i = 0; i < obj->nr_maps; i++) {
7657
map = &prog->obj->maps[i];
7658
if (map->libbpf_type != LIBBPF_MAP_RODATA)
7659
continue;
7660
7661
if (bpf_prog_bind_map(ret, map->fd, NULL)) {
7662
pr_warn("prog '%s': failed to bind map '%s': %s\n",
7663
prog->name, map->real_name, errstr(errno));
7664
/* Don't fail hard if can't bind rodata. */
7665
}
7666
}
7667
}
7668
7669
*prog_fd = ret;
7670
ret = 0;
7671
goto out;
7672
}
7673
7674
if (log_level == 0) {
7675
log_level = 1;
7676
goto retry_load;
7677
}
7678
/* On ENOSPC, increase log buffer size and retry, unless custom
7679
* log_buf is specified.
7680
* Be careful to not overflow u32, though. Kernel's log buf size limit
7681
* isn't part of UAPI so it can always be bumped to full 4GB. So don't
7682
* multiply by 2 unless we are sure we'll fit within 32 bits.
7683
* Currently, we'll get -EINVAL when we reach (UINT_MAX >> 2).
7684
*/
7685
if (own_log_buf && errno == ENOSPC && log_buf_size <= UINT_MAX / 2)
7686
goto retry_load;
7687
7688
ret = -errno;
7689
7690
/* post-process verifier log to improve error descriptions */
7691
fixup_verifier_log(prog, log_buf, log_buf_size);
7692
7693
pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, errstr(errno));
7694
pr_perm_msg(ret);
7695
7696
if (own_log_buf && log_buf && log_buf[0] != '\0') {
7697
pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n",
7698
prog->name, log_buf);
7699
}
7700
7701
out:
7702
if (own_log_buf)
7703
free(log_buf);
7704
return ret;
7705
}
7706
7707
static char *find_prev_line(char *buf, char *cur)
7708
{
7709
char *p;
7710
7711
if (cur == buf) /* end of a log buf */
7712
return NULL;
7713
7714
p = cur - 1;
7715
while (p - 1 >= buf && *(p - 1) != '\n')
7716
p--;
7717
7718
return p;
7719
}
7720
7721
static void patch_log(char *buf, size_t buf_sz, size_t log_sz,
7722
char *orig, size_t orig_sz, const char *patch)
7723
{
7724
/* size of the remaining log content to the right from the to-be-replaced part */
7725
size_t rem_sz = (buf + log_sz) - (orig + orig_sz);
7726
size_t patch_sz = strlen(patch);
7727
7728
if (patch_sz != orig_sz) {
7729
/* If patch line(s) are longer than original piece of verifier log,
7730
* shift log contents by (patch_sz - orig_sz) bytes to the right
7731
* starting from after to-be-replaced part of the log.
7732
*
7733
* If patch line(s) are shorter than original piece of verifier log,
7734
* shift log contents by (orig_sz - patch_sz) bytes to the left
7735
* starting from after to-be-replaced part of the log
7736
*
7737
* We need to be careful about not overflowing available
7738
* buf_sz capacity. If that's the case, we'll truncate the end
7739
* of the original log, as necessary.
7740
*/
7741
if (patch_sz > orig_sz) {
7742
if (orig + patch_sz >= buf + buf_sz) {
7743
/* patch is big enough to cover remaining space completely */
7744
patch_sz -= (orig + patch_sz) - (buf + buf_sz) + 1;
7745
rem_sz = 0;
7746
} else if (patch_sz - orig_sz > buf_sz - log_sz) {
7747
/* patch causes part of remaining log to be truncated */
7748
rem_sz -= (patch_sz - orig_sz) - (buf_sz - log_sz);
7749
}
7750
}
7751
/* shift remaining log to the right by calculated amount */
7752
memmove(orig + patch_sz, orig + orig_sz, rem_sz);
7753
}
7754
7755
memcpy(orig, patch, patch_sz);
7756
}
7757
7758
static void fixup_log_failed_core_relo(struct bpf_program *prog,
7759
char *buf, size_t buf_sz, size_t log_sz,
7760
char *line1, char *line2, char *line3)
7761
{
7762
/* Expected log for failed and not properly guarded CO-RE relocation:
7763
* line1 -> 123: (85) call unknown#195896080
7764
* line2 -> invalid func unknown#195896080
7765
* line3 -> <anything else or end of buffer>
7766
*
7767
* "123" is the index of the instruction that was poisoned. We extract
7768
* instruction index to find corresponding CO-RE relocation and
7769
* replace this part of the log with more relevant information about
7770
* failed CO-RE relocation.
7771
*/
7772
const struct bpf_core_relo *relo;
7773
struct bpf_core_spec spec;
7774
char patch[512], spec_buf[256];
7775
int insn_idx, err, spec_len;
7776
7777
if (sscanf(line1, "%d: (%*d) call unknown#195896080\n", &insn_idx) != 1)
7778
return;
7779
7780
relo = find_relo_core(prog, insn_idx);
7781
if (!relo)
7782
return;
7783
7784
err = bpf_core_parse_spec(prog->name, prog->obj->btf, relo, &spec);
7785
if (err)
7786
return;
7787
7788
spec_len = bpf_core_format_spec(spec_buf, sizeof(spec_buf), &spec);
7789
snprintf(patch, sizeof(patch),
7790
"%d: <invalid CO-RE relocation>\n"
7791
"failed to resolve CO-RE relocation %s%s\n",
7792
insn_idx, spec_buf, spec_len >= sizeof(spec_buf) ? "..." : "");
7793
7794
patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
7795
}
7796
7797
static void fixup_log_missing_map_load(struct bpf_program *prog,
7798
char *buf, size_t buf_sz, size_t log_sz,
7799
char *line1, char *line2, char *line3)
7800
{
7801
/* Expected log for failed and not properly guarded map reference:
7802
* line1 -> 123: (85) call unknown#2001000345
7803
* line2 -> invalid func unknown#2001000345
7804
* line3 -> <anything else or end of buffer>
7805
*
7806
* "123" is the index of the instruction that was poisoned.
7807
* "345" in "2001000345" is a map index in obj->maps to fetch map name.
7808
*/
7809
struct bpf_object *obj = prog->obj;
7810
const struct bpf_map *map;
7811
int insn_idx, map_idx;
7812
char patch[128];
7813
7814
if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &map_idx) != 2)
7815
return;
7816
7817
map_idx -= POISON_LDIMM64_MAP_BASE;
7818
if (map_idx < 0 || map_idx >= obj->nr_maps)
7819
return;
7820
map = &obj->maps[map_idx];
7821
7822
snprintf(patch, sizeof(patch),
7823
"%d: <invalid BPF map reference>\n"
7824
"BPF map '%s' is referenced but wasn't created\n",
7825
insn_idx, map->name);
7826
7827
patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
7828
}
7829
7830
static void fixup_log_missing_kfunc_call(struct bpf_program *prog,
7831
char *buf, size_t buf_sz, size_t log_sz,
7832
char *line1, char *line2, char *line3)
7833
{
7834
/* Expected log for failed and not properly guarded kfunc call:
7835
* line1 -> 123: (85) call unknown#2002000345
7836
* line2 -> invalid func unknown#2002000345
7837
* line3 -> <anything else or end of buffer>
7838
*
7839
* "123" is the index of the instruction that was poisoned.
7840
* "345" in "2002000345" is an extern index in obj->externs to fetch kfunc name.
7841
*/
7842
struct bpf_object *obj = prog->obj;
7843
const struct extern_desc *ext;
7844
int insn_idx, ext_idx;
7845
char patch[128];
7846
7847
if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &ext_idx) != 2)
7848
return;
7849
7850
ext_idx -= POISON_CALL_KFUNC_BASE;
7851
if (ext_idx < 0 || ext_idx >= obj->nr_extern)
7852
return;
7853
ext = &obj->externs[ext_idx];
7854
7855
snprintf(patch, sizeof(patch),
7856
"%d: <invalid kfunc call>\n"
7857
"kfunc '%s' is referenced but wasn't resolved\n",
7858
insn_idx, ext->name);
7859
7860
patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
7861
}
7862
7863
static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz)
7864
{
7865
/* look for familiar error patterns in last N lines of the log */
7866
const size_t max_last_line_cnt = 10;
7867
char *prev_line, *cur_line, *next_line;
7868
size_t log_sz;
7869
int i;
7870
7871
if (!buf)
7872
return;
7873
7874
log_sz = strlen(buf) + 1;
7875
next_line = buf + log_sz - 1;
7876
7877
for (i = 0; i < max_last_line_cnt; i++, next_line = cur_line) {
7878
cur_line = find_prev_line(buf, next_line);
7879
if (!cur_line)
7880
return;
7881
7882
if (str_has_pfx(cur_line, "invalid func unknown#195896080\n")) {
7883
prev_line = find_prev_line(buf, cur_line);
7884
if (!prev_line)
7885
continue;
7886
7887
/* failed CO-RE relocation case */
7888
fixup_log_failed_core_relo(prog, buf, buf_sz, log_sz,
7889
prev_line, cur_line, next_line);
7890
return;
7891
} else if (str_has_pfx(cur_line, "invalid func unknown#"POISON_LDIMM64_MAP_PFX)) {
7892
prev_line = find_prev_line(buf, cur_line);
7893
if (!prev_line)
7894
continue;
7895
7896
/* reference to uncreated BPF map */
7897
fixup_log_missing_map_load(prog, buf, buf_sz, log_sz,
7898
prev_line, cur_line, next_line);
7899
return;
7900
} else if (str_has_pfx(cur_line, "invalid func unknown#"POISON_CALL_KFUNC_PFX)) {
7901
prev_line = find_prev_line(buf, cur_line);
7902
if (!prev_line)
7903
continue;
7904
7905
/* reference to unresolved kfunc */
7906
fixup_log_missing_kfunc_call(prog, buf, buf_sz, log_sz,
7907
prev_line, cur_line, next_line);
7908
return;
7909
}
7910
}
7911
}
7912
7913
static int bpf_program_record_relos(struct bpf_program *prog)
7914
{
7915
struct bpf_object *obj = prog->obj;
7916
int i;
7917
7918
for (i = 0; i < prog->nr_reloc; i++) {
7919
struct reloc_desc *relo = &prog->reloc_desc[i];
7920
struct extern_desc *ext = &obj->externs[relo->ext_idx];
7921
int kind;
7922
7923
switch (relo->type) {
7924
case RELO_EXTERN_LD64:
7925
if (ext->type != EXT_KSYM)
7926
continue;
7927
kind = btf_is_var(btf__type_by_id(obj->btf, ext->btf_id)) ?
7928
BTF_KIND_VAR : BTF_KIND_FUNC;
7929
bpf_gen__record_extern(obj->gen_loader, ext->name,
7930
ext->is_weak, !ext->ksym.type_id,
7931
true, kind, relo->insn_idx);
7932
break;
7933
case RELO_EXTERN_CALL:
7934
bpf_gen__record_extern(obj->gen_loader, ext->name,
7935
ext->is_weak, false, false, BTF_KIND_FUNC,
7936
relo->insn_idx);
7937
break;
7938
case RELO_CORE: {
7939
struct bpf_core_relo cr = {
7940
.insn_off = relo->insn_idx * 8,
7941
.type_id = relo->core_relo->type_id,
7942
.access_str_off = relo->core_relo->access_str_off,
7943
.kind = relo->core_relo->kind,
7944
};
7945
7946
bpf_gen__record_relo_core(obj->gen_loader, &cr);
7947
break;
7948
}
7949
default:
7950
continue;
7951
}
7952
}
7953
return 0;
7954
}
7955
7956
static int
7957
bpf_object__load_progs(struct bpf_object *obj, int log_level)
7958
{
7959
struct bpf_program *prog;
7960
size_t i;
7961
int err;
7962
7963
for (i = 0; i < obj->nr_programs; i++) {
7964
prog = &obj->programs[i];
7965
if (prog_is_subprog(obj, prog))
7966
continue;
7967
if (!prog->autoload) {
7968
pr_debug("prog '%s': skipped loading\n", prog->name);
7969
continue;
7970
}
7971
prog->log_level |= log_level;
7972
7973
if (obj->gen_loader)
7974
bpf_program_record_relos(prog);
7975
7976
err = bpf_object_load_prog(obj, prog, prog->insns, prog->insns_cnt,
7977
obj->license, obj->kern_version, &prog->fd);
7978
if (err) {
7979
pr_warn("prog '%s': failed to load: %s\n", prog->name, errstr(err));
7980
return err;
7981
}
7982
}
7983
7984
bpf_object__free_relocs(obj);
7985
return 0;
7986
}
7987
7988
static int bpf_object_prepare_progs(struct bpf_object *obj)
7989
{
7990
struct bpf_program *prog;
7991
size_t i;
7992
int err;
7993
7994
for (i = 0; i < obj->nr_programs; i++) {
7995
prog = &obj->programs[i];
7996
err = bpf_object__sanitize_prog(obj, prog);
7997
if (err)
7998
return err;
7999
}
8000
return 0;
8001
}
8002
8003
static const struct bpf_sec_def *find_sec_def(const char *sec_name);
8004
8005
static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object_open_opts *opts)
8006
{
8007
struct bpf_program *prog;
8008
int err;
8009
8010
bpf_object__for_each_program(prog, obj) {
8011
prog->sec_def = find_sec_def(prog->sec_name);
8012
if (!prog->sec_def) {
8013
/* couldn't guess, but user might manually specify */
8014
pr_debug("prog '%s': unrecognized ELF section name '%s'\n",
8015
prog->name, prog->sec_name);
8016
continue;
8017
}
8018
8019
prog->type = prog->sec_def->prog_type;
8020
prog->expected_attach_type = prog->sec_def->expected_attach_type;
8021
8022
/* sec_def can have custom callback which should be called
8023
* after bpf_program is initialized to adjust its properties
8024
*/
8025
if (prog->sec_def->prog_setup_fn) {
8026
err = prog->sec_def->prog_setup_fn(prog, prog->sec_def->cookie);
8027
if (err < 0) {
8028
pr_warn("prog '%s': failed to initialize: %s\n",
8029
prog->name, errstr(err));
8030
return err;
8031
}
8032
}
8033
}
8034
8035
return 0;
8036
}
8037
8038
static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz,
8039
const char *obj_name,
8040
const struct bpf_object_open_opts *opts)
8041
{
8042
const char *kconfig, *btf_tmp_path, *token_path;
8043
struct bpf_object *obj;
8044
int err;
8045
char *log_buf;
8046
size_t log_size;
8047
__u32 log_level;
8048
8049
if (obj_buf && !obj_name)
8050
return ERR_PTR(-EINVAL);
8051
8052
if (elf_version(EV_CURRENT) == EV_NONE) {
8053
pr_warn("failed to init libelf for %s\n",
8054
path ? : "(mem buf)");
8055
return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
8056
}
8057
8058
if (!OPTS_VALID(opts, bpf_object_open_opts))
8059
return ERR_PTR(-EINVAL);
8060
8061
obj_name = OPTS_GET(opts, object_name, NULL) ?: obj_name;
8062
if (obj_buf) {
8063
path = obj_name;
8064
pr_debug("loading object '%s' from buffer\n", obj_name);
8065
} else {
8066
pr_debug("loading object from %s\n", path);
8067
}
8068
8069
log_buf = OPTS_GET(opts, kernel_log_buf, NULL);
8070
log_size = OPTS_GET(opts, kernel_log_size, 0);
8071
log_level = OPTS_GET(opts, kernel_log_level, 0);
8072
if (log_size > UINT_MAX)
8073
return ERR_PTR(-EINVAL);
8074
if (log_size && !log_buf)
8075
return ERR_PTR(-EINVAL);
8076
8077
token_path = OPTS_GET(opts, bpf_token_path, NULL);
8078
/* if user didn't specify bpf_token_path explicitly, check if
8079
* LIBBPF_BPF_TOKEN_PATH envvar was set and treat it as bpf_token_path
8080
* option
8081
*/
8082
if (!token_path)
8083
token_path = getenv("LIBBPF_BPF_TOKEN_PATH");
8084
if (token_path && strlen(token_path) >= PATH_MAX)
8085
return ERR_PTR(-ENAMETOOLONG);
8086
8087
obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name);
8088
if (IS_ERR(obj))
8089
return obj;
8090
8091
obj->log_buf = log_buf;
8092
obj->log_size = log_size;
8093
obj->log_level = log_level;
8094
8095
if (token_path) {
8096
obj->token_path = strdup(token_path);
8097
if (!obj->token_path) {
8098
err = -ENOMEM;
8099
goto out;
8100
}
8101
}
8102
8103
btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL);
8104
if (btf_tmp_path) {
8105
if (strlen(btf_tmp_path) >= PATH_MAX) {
8106
err = -ENAMETOOLONG;
8107
goto out;
8108
}
8109
obj->btf_custom_path = strdup(btf_tmp_path);
8110
if (!obj->btf_custom_path) {
8111
err = -ENOMEM;
8112
goto out;
8113
}
8114
}
8115
8116
kconfig = OPTS_GET(opts, kconfig, NULL);
8117
if (kconfig) {
8118
obj->kconfig = strdup(kconfig);
8119
if (!obj->kconfig) {
8120
err = -ENOMEM;
8121
goto out;
8122
}
8123
}
8124
8125
err = bpf_object__elf_init(obj);
8126
err = err ? : bpf_object__elf_collect(obj);
8127
err = err ? : bpf_object__collect_externs(obj);
8128
err = err ? : bpf_object_fixup_btf(obj);
8129
err = err ? : bpf_object__init_maps(obj, opts);
8130
err = err ? : bpf_object_init_progs(obj, opts);
8131
err = err ? : bpf_object__collect_relos(obj);
8132
if (err)
8133
goto out;
8134
8135
bpf_object__elf_finish(obj);
8136
8137
return obj;
8138
out:
8139
bpf_object__close(obj);
8140
return ERR_PTR(err);
8141
}
8142
8143
struct bpf_object *
8144
bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
8145
{
8146
if (!path)
8147
return libbpf_err_ptr(-EINVAL);
8148
8149
return libbpf_ptr(bpf_object_open(path, NULL, 0, NULL, opts));
8150
}
8151
8152
struct bpf_object *bpf_object__open(const char *path)
8153
{
8154
return bpf_object__open_file(path, NULL);
8155
}
8156
8157
struct bpf_object *
8158
bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
8159
const struct bpf_object_open_opts *opts)
8160
{
8161
char tmp_name[64];
8162
8163
if (!obj_buf || obj_buf_sz == 0)
8164
return libbpf_err_ptr(-EINVAL);
8165
8166
/* create a (quite useless) default "name" for this memory buffer object */
8167
snprintf(tmp_name, sizeof(tmp_name), "%lx-%zx", (unsigned long)obj_buf, obj_buf_sz);
8168
8169
return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, tmp_name, opts));
8170
}
8171
8172
static int bpf_object_unload(struct bpf_object *obj)
8173
{
8174
size_t i;
8175
8176
if (!obj)
8177
return libbpf_err(-EINVAL);
8178
8179
for (i = 0; i < obj->nr_maps; i++) {
8180
zclose(obj->maps[i].fd);
8181
if (obj->maps[i].st_ops)
8182
zfree(&obj->maps[i].st_ops->kern_vdata);
8183
}
8184
8185
for (i = 0; i < obj->nr_programs; i++)
8186
bpf_program__unload(&obj->programs[i]);
8187
8188
return 0;
8189
}
8190
8191
static int bpf_object__sanitize_maps(struct bpf_object *obj)
8192
{
8193
struct bpf_map *m;
8194
8195
bpf_object__for_each_map(m, obj) {
8196
if (!bpf_map__is_internal(m))
8197
continue;
8198
if (!kernel_supports(obj, FEAT_ARRAY_MMAP))
8199
m->def.map_flags &= ~BPF_F_MMAPABLE;
8200
}
8201
8202
return 0;
8203
}
8204
8205
typedef int (*kallsyms_cb_t)(unsigned long long sym_addr, char sym_type,
8206
const char *sym_name, void *ctx);
8207
8208
static int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *ctx)
8209
{
8210
char sym_type, sym_name[500];
8211
unsigned long long sym_addr;
8212
int ret, err = 0;
8213
FILE *f;
8214
8215
f = fopen("/proc/kallsyms", "re");
8216
if (!f) {
8217
err = -errno;
8218
pr_warn("failed to open /proc/kallsyms: %s\n", errstr(err));
8219
return err;
8220
}
8221
8222
while (true) {
8223
ret = fscanf(f, "%llx %c %499s%*[^\n]\n",
8224
&sym_addr, &sym_type, sym_name);
8225
if (ret == EOF && feof(f))
8226
break;
8227
if (ret != 3) {
8228
pr_warn("failed to read kallsyms entry: %d\n", ret);
8229
err = -EINVAL;
8230
break;
8231
}
8232
8233
err = cb(sym_addr, sym_type, sym_name, ctx);
8234
if (err)
8235
break;
8236
}
8237
8238
fclose(f);
8239
return err;
8240
}
8241
8242
static int kallsyms_cb(unsigned long long sym_addr, char sym_type,
8243
const char *sym_name, void *ctx)
8244
{
8245
struct bpf_object *obj = ctx;
8246
const struct btf_type *t;
8247
struct extern_desc *ext;
8248
char *res;
8249
8250
res = strstr(sym_name, ".llvm.");
8251
if (sym_type == 'd' && res)
8252
ext = find_extern_by_name_with_len(obj, sym_name, res - sym_name);
8253
else
8254
ext = find_extern_by_name(obj, sym_name);
8255
if (!ext || ext->type != EXT_KSYM)
8256
return 0;
8257
8258
t = btf__type_by_id(obj->btf, ext->btf_id);
8259
if (!btf_is_var(t))
8260
return 0;
8261
8262
if (ext->is_set && ext->ksym.addr != sym_addr) {
8263
pr_warn("extern (ksym) '%s': resolution is ambiguous: 0x%llx or 0x%llx\n",
8264
sym_name, ext->ksym.addr, sym_addr);
8265
return -EINVAL;
8266
}
8267
if (!ext->is_set) {
8268
ext->is_set = true;
8269
ext->ksym.addr = sym_addr;
8270
pr_debug("extern (ksym) '%s': set to 0x%llx\n", sym_name, sym_addr);
8271
}
8272
return 0;
8273
}
8274
8275
static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
8276
{
8277
return libbpf_kallsyms_parse(kallsyms_cb, obj);
8278
}
8279
8280
static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name,
8281
__u16 kind, struct btf **res_btf,
8282
struct module_btf **res_mod_btf)
8283
{
8284
struct module_btf *mod_btf;
8285
struct btf *btf;
8286
int i, id, err;
8287
8288
btf = obj->btf_vmlinux;
8289
mod_btf = NULL;
8290
id = btf__find_by_name_kind(btf, ksym_name, kind);
8291
8292
if (id == -ENOENT) {
8293
err = load_module_btfs(obj);
8294
if (err)
8295
return err;
8296
8297
for (i = 0; i < obj->btf_module_cnt; i++) {
8298
/* we assume module_btf's BTF FD is always >0 */
8299
mod_btf = &obj->btf_modules[i];
8300
btf = mod_btf->btf;
8301
id = btf__find_by_name_kind_own(btf, ksym_name, kind);
8302
if (id != -ENOENT)
8303
break;
8304
}
8305
}
8306
if (id <= 0)
8307
return -ESRCH;
8308
8309
*res_btf = btf;
8310
*res_mod_btf = mod_btf;
8311
return id;
8312
}
8313
8314
static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj,
8315
struct extern_desc *ext)
8316
{
8317
const struct btf_type *targ_var, *targ_type;
8318
__u32 targ_type_id, local_type_id;
8319
struct module_btf *mod_btf = NULL;
8320
const char *targ_var_name;
8321
struct btf *btf = NULL;
8322
int id, err;
8323
8324
id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &mod_btf);
8325
if (id < 0) {
8326
if (id == -ESRCH && ext->is_weak)
8327
return 0;
8328
pr_warn("extern (var ksym) '%s': not found in kernel BTF\n",
8329
ext->name);
8330
return id;
8331
}
8332
8333
/* find local type_id */
8334
local_type_id = ext->ksym.type_id;
8335
8336
/* find target type_id */
8337
targ_var = btf__type_by_id(btf, id);
8338
targ_var_name = btf__name_by_offset(btf, targ_var->name_off);
8339
targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id);
8340
8341
err = bpf_core_types_are_compat(obj->btf, local_type_id,
8342
btf, targ_type_id);
8343
if (err <= 0) {
8344
const struct btf_type *local_type;
8345
const char *targ_name, *local_name;
8346
8347
local_type = btf__type_by_id(obj->btf, local_type_id);
8348
local_name = btf__name_by_offset(obj->btf, local_type->name_off);
8349
targ_name = btf__name_by_offset(btf, targ_type->name_off);
8350
8351
pr_warn("extern (var ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n",
8352
ext->name, local_type_id,
8353
btf_kind_str(local_type), local_name, targ_type_id,
8354
btf_kind_str(targ_type), targ_name);
8355
return -EINVAL;
8356
}
8357
8358
ext->is_set = true;
8359
ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0;
8360
ext->ksym.kernel_btf_id = id;
8361
pr_debug("extern (var ksym) '%s': resolved to [%d] %s %s\n",
8362
ext->name, id, btf_kind_str(targ_var), targ_var_name);
8363
8364
return 0;
8365
}
8366
8367
static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj,
8368
struct extern_desc *ext)
8369
{
8370
int local_func_proto_id, kfunc_proto_id, kfunc_id;
8371
struct module_btf *mod_btf = NULL;
8372
const struct btf_type *kern_func;
8373
struct btf *kern_btf = NULL;
8374
int ret;
8375
8376
local_func_proto_id = ext->ksym.type_id;
8377
8378
kfunc_id = find_ksym_btf_id(obj, ext->essent_name ?: ext->name, BTF_KIND_FUNC, &kern_btf,
8379
&mod_btf);
8380
if (kfunc_id < 0) {
8381
if (kfunc_id == -ESRCH && ext->is_weak)
8382
return 0;
8383
pr_warn("extern (func ksym) '%s': not found in kernel or module BTFs\n",
8384
ext->name);
8385
return kfunc_id;
8386
}
8387
8388
kern_func = btf__type_by_id(kern_btf, kfunc_id);
8389
kfunc_proto_id = kern_func->type;
8390
8391
ret = bpf_core_types_are_compat(obj->btf, local_func_proto_id,
8392
kern_btf, kfunc_proto_id);
8393
if (ret <= 0) {
8394
if (ext->is_weak)
8395
return 0;
8396
8397
pr_warn("extern (func ksym) '%s': func_proto [%d] incompatible with %s [%d]\n",
8398
ext->name, local_func_proto_id,
8399
mod_btf ? mod_btf->name : "vmlinux", kfunc_proto_id);
8400
return -EINVAL;
8401
}
8402
8403
/* set index for module BTF fd in fd_array, if unset */
8404
if (mod_btf && !mod_btf->fd_array_idx) {
8405
/* insn->off is s16 */
8406
if (obj->fd_array_cnt == INT16_MAX) {
8407
pr_warn("extern (func ksym) '%s': module BTF fd index %d too big to fit in bpf_insn offset\n",
8408
ext->name, mod_btf->fd_array_idx);
8409
return -E2BIG;
8410
}
8411
/* Cannot use index 0 for module BTF fd */
8412
if (!obj->fd_array_cnt)
8413
obj->fd_array_cnt = 1;
8414
8415
ret = libbpf_ensure_mem((void **)&obj->fd_array, &obj->fd_array_cap, sizeof(int),
8416
obj->fd_array_cnt + 1);
8417
if (ret)
8418
return ret;
8419
mod_btf->fd_array_idx = obj->fd_array_cnt;
8420
/* we assume module BTF FD is always >0 */
8421
obj->fd_array[obj->fd_array_cnt++] = mod_btf->fd;
8422
}
8423
8424
ext->is_set = true;
8425
ext->ksym.kernel_btf_id = kfunc_id;
8426
ext->ksym.btf_fd_idx = mod_btf ? mod_btf->fd_array_idx : 0;
8427
/* Also set kernel_btf_obj_fd to make sure that bpf_object__relocate_data()
8428
* populates FD into ld_imm64 insn when it's used to point to kfunc.
8429
* {kernel_btf_id, btf_fd_idx} -> fixup bpf_call.
8430
* {kernel_btf_id, kernel_btf_obj_fd} -> fixup ld_imm64.
8431
*/
8432
ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0;
8433
pr_debug("extern (func ksym) '%s': resolved to %s [%d]\n",
8434
ext->name, mod_btf ? mod_btf->name : "vmlinux", kfunc_id);
8435
8436
return 0;
8437
}
8438
8439
static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
8440
{
8441
const struct btf_type *t;
8442
struct extern_desc *ext;
8443
int i, err;
8444
8445
for (i = 0; i < obj->nr_extern; i++) {
8446
ext = &obj->externs[i];
8447
if (ext->type != EXT_KSYM || !ext->ksym.type_id)
8448
continue;
8449
8450
if (obj->gen_loader) {
8451
ext->is_set = true;
8452
ext->ksym.kernel_btf_obj_fd = 0;
8453
ext->ksym.kernel_btf_id = 0;
8454
continue;
8455
}
8456
t = btf__type_by_id(obj->btf, ext->btf_id);
8457
if (btf_is_var(t))
8458
err = bpf_object__resolve_ksym_var_btf_id(obj, ext);
8459
else
8460
err = bpf_object__resolve_ksym_func_btf_id(obj, ext);
8461
if (err)
8462
return err;
8463
}
8464
return 0;
8465
}
8466
8467
static int bpf_object__resolve_externs(struct bpf_object *obj,
8468
const char *extra_kconfig)
8469
{
8470
bool need_config = false, need_kallsyms = false;
8471
bool need_vmlinux_btf = false;
8472
struct extern_desc *ext;
8473
void *kcfg_data = NULL;
8474
int err, i;
8475
8476
if (obj->nr_extern == 0)
8477
return 0;
8478
8479
if (obj->kconfig_map_idx >= 0)
8480
kcfg_data = obj->maps[obj->kconfig_map_idx].mmaped;
8481
8482
for (i = 0; i < obj->nr_extern; i++) {
8483
ext = &obj->externs[i];
8484
8485
if (ext->type == EXT_KSYM) {
8486
if (ext->ksym.type_id)
8487
need_vmlinux_btf = true;
8488
else
8489
need_kallsyms = true;
8490
continue;
8491
} else if (ext->type == EXT_KCFG) {
8492
void *ext_ptr = kcfg_data + ext->kcfg.data_off;
8493
__u64 value = 0;
8494
8495
/* Kconfig externs need actual /proc/config.gz */
8496
if (str_has_pfx(ext->name, "CONFIG_")) {
8497
need_config = true;
8498
continue;
8499
}
8500
8501
/* Virtual kcfg externs are customly handled by libbpf */
8502
if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
8503
value = get_kernel_version();
8504
if (!value) {
8505
pr_warn("extern (kcfg) '%s': failed to get kernel version\n", ext->name);
8506
return -EINVAL;
8507
}
8508
} else if (strcmp(ext->name, "LINUX_HAS_BPF_COOKIE") == 0) {
8509
value = kernel_supports(obj, FEAT_BPF_COOKIE);
8510
} else if (strcmp(ext->name, "LINUX_HAS_SYSCALL_WRAPPER") == 0) {
8511
value = kernel_supports(obj, FEAT_SYSCALL_WRAPPER);
8512
} else if (!str_has_pfx(ext->name, "LINUX_") || !ext->is_weak) {
8513
/* Currently libbpf supports only CONFIG_ and LINUX_ prefixed
8514
* __kconfig externs, where LINUX_ ones are virtual and filled out
8515
* customly by libbpf (their values don't come from Kconfig).
8516
* If LINUX_xxx variable is not recognized by libbpf, but is marked
8517
* __weak, it defaults to zero value, just like for CONFIG_xxx
8518
* externs.
8519
*/
8520
pr_warn("extern (kcfg) '%s': unrecognized virtual extern\n", ext->name);
8521
return -EINVAL;
8522
}
8523
8524
err = set_kcfg_value_num(ext, ext_ptr, value);
8525
if (err)
8526
return err;
8527
pr_debug("extern (kcfg) '%s': set to 0x%llx\n",
8528
ext->name, (long long)value);
8529
} else {
8530
pr_warn("extern '%s': unrecognized extern kind\n", ext->name);
8531
return -EINVAL;
8532
}
8533
}
8534
if (need_config && extra_kconfig) {
8535
err = bpf_object__read_kconfig_mem(obj, extra_kconfig, kcfg_data);
8536
if (err)
8537
return -EINVAL;
8538
need_config = false;
8539
for (i = 0; i < obj->nr_extern; i++) {
8540
ext = &obj->externs[i];
8541
if (ext->type == EXT_KCFG && !ext->is_set) {
8542
need_config = true;
8543
break;
8544
}
8545
}
8546
}
8547
if (need_config) {
8548
err = bpf_object__read_kconfig_file(obj, kcfg_data);
8549
if (err)
8550
return -EINVAL;
8551
}
8552
if (need_kallsyms) {
8553
err = bpf_object__read_kallsyms_file(obj);
8554
if (err)
8555
return -EINVAL;
8556
}
8557
if (need_vmlinux_btf) {
8558
err = bpf_object__resolve_ksyms_btf_id(obj);
8559
if (err)
8560
return -EINVAL;
8561
}
8562
for (i = 0; i < obj->nr_extern; i++) {
8563
ext = &obj->externs[i];
8564
8565
if (!ext->is_set && !ext->is_weak) {
8566
pr_warn("extern '%s' (strong): not resolved\n", ext->name);
8567
return -ESRCH;
8568
} else if (!ext->is_set) {
8569
pr_debug("extern '%s' (weak): not resolved, defaulting to zero\n",
8570
ext->name);
8571
}
8572
}
8573
8574
return 0;
8575
}
8576
8577
static void bpf_map_prepare_vdata(const struct bpf_map *map)
8578
{
8579
const struct btf_type *type;
8580
struct bpf_struct_ops *st_ops;
8581
__u32 i;
8582
8583
st_ops = map->st_ops;
8584
type = btf__type_by_id(map->obj->btf, st_ops->type_id);
8585
for (i = 0; i < btf_vlen(type); i++) {
8586
struct bpf_program *prog = st_ops->progs[i];
8587
void *kern_data;
8588
int prog_fd;
8589
8590
if (!prog)
8591
continue;
8592
8593
prog_fd = bpf_program__fd(prog);
8594
kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i];
8595
*(unsigned long *)kern_data = prog_fd;
8596
}
8597
}
8598
8599
static int bpf_object_prepare_struct_ops(struct bpf_object *obj)
8600
{
8601
struct bpf_map *map;
8602
int i;
8603
8604
for (i = 0; i < obj->nr_maps; i++) {
8605
map = &obj->maps[i];
8606
8607
if (!bpf_map__is_struct_ops(map))
8608
continue;
8609
8610
if (!map->autocreate)
8611
continue;
8612
8613
bpf_map_prepare_vdata(map);
8614
}
8615
8616
return 0;
8617
}
8618
8619
static void bpf_object_unpin(struct bpf_object *obj)
8620
{
8621
int i;
8622
8623
/* unpin any maps that were auto-pinned during load */
8624
for (i = 0; i < obj->nr_maps; i++)
8625
if (obj->maps[i].pinned && !obj->maps[i].reused)
8626
bpf_map__unpin(&obj->maps[i], NULL);
8627
}
8628
8629
static void bpf_object_post_load_cleanup(struct bpf_object *obj)
8630
{
8631
int i;
8632
8633
/* clean up fd_array */
8634
zfree(&obj->fd_array);
8635
8636
/* clean up module BTFs */
8637
for (i = 0; i < obj->btf_module_cnt; i++) {
8638
close(obj->btf_modules[i].fd);
8639
btf__free(obj->btf_modules[i].btf);
8640
free(obj->btf_modules[i].name);
8641
}
8642
obj->btf_module_cnt = 0;
8643
zfree(&obj->btf_modules);
8644
8645
/* clean up vmlinux BTF */
8646
btf__free(obj->btf_vmlinux);
8647
obj->btf_vmlinux = NULL;
8648
}
8649
8650
static int bpf_object_prepare(struct bpf_object *obj, const char *target_btf_path)
8651
{
8652
int err;
8653
8654
if (obj->state >= OBJ_PREPARED) {
8655
pr_warn("object '%s': prepare loading can't be attempted twice\n", obj->name);
8656
return -EINVAL;
8657
}
8658
8659
err = bpf_object_prepare_token(obj);
8660
err = err ? : bpf_object__probe_loading(obj);
8661
err = err ? : bpf_object__load_vmlinux_btf(obj, false);
8662
err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
8663
err = err ? : bpf_object__sanitize_maps(obj);
8664
err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
8665
err = err ? : bpf_object_adjust_struct_ops_autoload(obj);
8666
err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path);
8667
err = err ? : bpf_object__sanitize_and_load_btf(obj);
8668
err = err ? : bpf_object__create_maps(obj);
8669
err = err ? : bpf_object_prepare_progs(obj);
8670
8671
if (err) {
8672
bpf_object_unpin(obj);
8673
bpf_object_unload(obj);
8674
obj->state = OBJ_LOADED;
8675
return err;
8676
}
8677
8678
obj->state = OBJ_PREPARED;
8679
return 0;
8680
}
8681
8682
static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path)
8683
{
8684
int err;
8685
8686
if (!obj)
8687
return libbpf_err(-EINVAL);
8688
8689
if (obj->state >= OBJ_LOADED) {
8690
pr_warn("object '%s': load can't be attempted twice\n", obj->name);
8691
return libbpf_err(-EINVAL);
8692
}
8693
8694
/* Disallow kernel loading programs of non-native endianness but
8695
* permit cross-endian creation of "light skeleton".
8696
*/
8697
if (obj->gen_loader) {
8698
bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps);
8699
} else if (!is_native_endianness(obj)) {
8700
pr_warn("object '%s': loading non-native endianness is unsupported\n", obj->name);
8701
return libbpf_err(-LIBBPF_ERRNO__ENDIAN);
8702
}
8703
8704
if (obj->state < OBJ_PREPARED) {
8705
err = bpf_object_prepare(obj, target_btf_path);
8706
if (err)
8707
return libbpf_err(err);
8708
}
8709
err = bpf_object__load_progs(obj, extra_log_level);
8710
err = err ? : bpf_object_init_prog_arrays(obj);
8711
err = err ? : bpf_object_prepare_struct_ops(obj);
8712
8713
if (obj->gen_loader) {
8714
/* reset FDs */
8715
if (obj->btf)
8716
btf__set_fd(obj->btf, -1);
8717
if (!err)
8718
err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps);
8719
}
8720
8721
bpf_object_post_load_cleanup(obj);
8722
obj->state = OBJ_LOADED; /* doesn't matter if successfully or not */
8723
8724
if (err) {
8725
bpf_object_unpin(obj);
8726
bpf_object_unload(obj);
8727
pr_warn("failed to load object '%s'\n", obj->path);
8728
return libbpf_err(err);
8729
}
8730
8731
return 0;
8732
}
8733
8734
int bpf_object__prepare(struct bpf_object *obj)
8735
{
8736
return libbpf_err(bpf_object_prepare(obj, NULL));
8737
}
8738
8739
int bpf_object__load(struct bpf_object *obj)
8740
{
8741
return bpf_object_load(obj, 0, NULL);
8742
}
8743
8744
static int make_parent_dir(const char *path)
8745
{
8746
char *dname, *dir;
8747
int err = 0;
8748
8749
dname = strdup(path);
8750
if (dname == NULL)
8751
return -ENOMEM;
8752
8753
dir = dirname(dname);
8754
if (mkdir(dir, 0700) && errno != EEXIST)
8755
err = -errno;
8756
8757
free(dname);
8758
if (err) {
8759
pr_warn("failed to mkdir %s: %s\n", path, errstr(err));
8760
}
8761
return err;
8762
}
8763
8764
static int check_path(const char *path)
8765
{
8766
struct statfs st_fs;
8767
char *dname, *dir;
8768
int err = 0;
8769
8770
if (path == NULL)
8771
return -EINVAL;
8772
8773
dname = strdup(path);
8774
if (dname == NULL)
8775
return -ENOMEM;
8776
8777
dir = dirname(dname);
8778
if (statfs(dir, &st_fs)) {
8779
pr_warn("failed to statfs %s: %s\n", dir, errstr(errno));
8780
err = -errno;
8781
}
8782
free(dname);
8783
8784
if (!err && st_fs.f_type != BPF_FS_MAGIC) {
8785
pr_warn("specified path %s is not on BPF FS\n", path);
8786
err = -EINVAL;
8787
}
8788
8789
return err;
8790
}
8791
8792
int bpf_program__pin(struct bpf_program *prog, const char *path)
8793
{
8794
int err;
8795
8796
if (prog->fd < 0) {
8797
pr_warn("prog '%s': can't pin program that wasn't loaded\n", prog->name);
8798
return libbpf_err(-EINVAL);
8799
}
8800
8801
err = make_parent_dir(path);
8802
if (err)
8803
return libbpf_err(err);
8804
8805
err = check_path(path);
8806
if (err)
8807
return libbpf_err(err);
8808
8809
if (bpf_obj_pin(prog->fd, path)) {
8810
err = -errno;
8811
pr_warn("prog '%s': failed to pin at '%s': %s\n", prog->name, path, errstr(err));
8812
return libbpf_err(err);
8813
}
8814
8815
pr_debug("prog '%s': pinned at '%s'\n", prog->name, path);
8816
return 0;
8817
}
8818
8819
int bpf_program__unpin(struct bpf_program *prog, const char *path)
8820
{
8821
int err;
8822
8823
if (prog->fd < 0) {
8824
pr_warn("prog '%s': can't unpin program that wasn't loaded\n", prog->name);
8825
return libbpf_err(-EINVAL);
8826
}
8827
8828
err = check_path(path);
8829
if (err)
8830
return libbpf_err(err);
8831
8832
err = unlink(path);
8833
if (err)
8834
return libbpf_err(-errno);
8835
8836
pr_debug("prog '%s': unpinned from '%s'\n", prog->name, path);
8837
return 0;
8838
}
8839
8840
int bpf_map__pin(struct bpf_map *map, const char *path)
8841
{
8842
int err;
8843
8844
if (map == NULL) {
8845
pr_warn("invalid map pointer\n");
8846
return libbpf_err(-EINVAL);
8847
}
8848
8849
if (map->fd < 0) {
8850
pr_warn("map '%s': can't pin BPF map without FD (was it created?)\n", map->name);
8851
return libbpf_err(-EINVAL);
8852
}
8853
8854
if (map->pin_path) {
8855
if (path && strcmp(path, map->pin_path)) {
8856
pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
8857
bpf_map__name(map), map->pin_path, path);
8858
return libbpf_err(-EINVAL);
8859
} else if (map->pinned) {
8860
pr_debug("map '%s' already pinned at '%s'; not re-pinning\n",
8861
bpf_map__name(map), map->pin_path);
8862
return 0;
8863
}
8864
} else {
8865
if (!path) {
8866
pr_warn("missing a path to pin map '%s' at\n",
8867
bpf_map__name(map));
8868
return libbpf_err(-EINVAL);
8869
} else if (map->pinned) {
8870
pr_warn("map '%s' already pinned\n", bpf_map__name(map));
8871
return libbpf_err(-EEXIST);
8872
}
8873
8874
map->pin_path = strdup(path);
8875
if (!map->pin_path) {
8876
err = -errno;
8877
goto out_err;
8878
}
8879
}
8880
8881
err = make_parent_dir(map->pin_path);
8882
if (err)
8883
return libbpf_err(err);
8884
8885
err = check_path(map->pin_path);
8886
if (err)
8887
return libbpf_err(err);
8888
8889
if (bpf_obj_pin(map->fd, map->pin_path)) {
8890
err = -errno;
8891
goto out_err;
8892
}
8893
8894
map->pinned = true;
8895
pr_debug("pinned map '%s'\n", map->pin_path);
8896
8897
return 0;
8898
8899
out_err:
8900
pr_warn("failed to pin map: %s\n", errstr(err));
8901
return libbpf_err(err);
8902
}
8903
8904
int bpf_map__unpin(struct bpf_map *map, const char *path)
8905
{
8906
int err;
8907
8908
if (map == NULL) {
8909
pr_warn("invalid map pointer\n");
8910
return libbpf_err(-EINVAL);
8911
}
8912
8913
if (map->pin_path) {
8914
if (path && strcmp(path, map->pin_path)) {
8915
pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
8916
bpf_map__name(map), map->pin_path, path);
8917
return libbpf_err(-EINVAL);
8918
}
8919
path = map->pin_path;
8920
} else if (!path) {
8921
pr_warn("no path to unpin map '%s' from\n",
8922
bpf_map__name(map));
8923
return libbpf_err(-EINVAL);
8924
}
8925
8926
err = check_path(path);
8927
if (err)
8928
return libbpf_err(err);
8929
8930
err = unlink(path);
8931
if (err != 0)
8932
return libbpf_err(-errno);
8933
8934
map->pinned = false;
8935
pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path);
8936
8937
return 0;
8938
}
8939
8940
int bpf_map__set_pin_path(struct bpf_map *map, const char *path)
8941
{
8942
char *new = NULL;
8943
8944
if (path) {
8945
new = strdup(path);
8946
if (!new)
8947
return libbpf_err(-errno);
8948
}
8949
8950
free(map->pin_path);
8951
map->pin_path = new;
8952
return 0;
8953
}
8954
8955
__alias(bpf_map__pin_path)
8956
const char *bpf_map__get_pin_path(const struct bpf_map *map);
8957
8958
const char *bpf_map__pin_path(const struct bpf_map *map)
8959
{
8960
return map->pin_path;
8961
}
8962
8963
bool bpf_map__is_pinned(const struct bpf_map *map)
8964
{
8965
return map->pinned;
8966
}
8967
8968
static void sanitize_pin_path(char *s)
8969
{
8970
/* bpffs disallows periods in path names */
8971
while (*s) {
8972
if (*s == '.')
8973
*s = '_';
8974
s++;
8975
}
8976
}
8977
8978
int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
8979
{
8980
struct bpf_map *map;
8981
int err;
8982
8983
if (!obj)
8984
return libbpf_err(-ENOENT);
8985
8986
if (obj->state < OBJ_PREPARED) {
8987
pr_warn("object not yet loaded; load it first\n");
8988
return libbpf_err(-ENOENT);
8989
}
8990
8991
bpf_object__for_each_map(map, obj) {
8992
char *pin_path = NULL;
8993
char buf[PATH_MAX];
8994
8995
if (!map->autocreate)
8996
continue;
8997
8998
if (path) {
8999
err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
9000
if (err)
9001
goto err_unpin_maps;
9002
sanitize_pin_path(buf);
9003
pin_path = buf;
9004
} else if (!map->pin_path) {
9005
continue;
9006
}
9007
9008
err = bpf_map__pin(map, pin_path);
9009
if (err)
9010
goto err_unpin_maps;
9011
}
9012
9013
return 0;
9014
9015
err_unpin_maps:
9016
while ((map = bpf_object__prev_map(obj, map))) {
9017
if (!map->pin_path)
9018
continue;
9019
9020
bpf_map__unpin(map, NULL);
9021
}
9022
9023
return libbpf_err(err);
9024
}
9025
9026
int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
9027
{
9028
struct bpf_map *map;
9029
int err;
9030
9031
if (!obj)
9032
return libbpf_err(-ENOENT);
9033
9034
bpf_object__for_each_map(map, obj) {
9035
char *pin_path = NULL;
9036
char buf[PATH_MAX];
9037
9038
if (path) {
9039
err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
9040
if (err)
9041
return libbpf_err(err);
9042
sanitize_pin_path(buf);
9043
pin_path = buf;
9044
} else if (!map->pin_path) {
9045
continue;
9046
}
9047
9048
err = bpf_map__unpin(map, pin_path);
9049
if (err)
9050
return libbpf_err(err);
9051
}
9052
9053
return 0;
9054
}
9055
9056
int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
9057
{
9058
struct bpf_program *prog;
9059
char buf[PATH_MAX];
9060
int err;
9061
9062
if (!obj)
9063
return libbpf_err(-ENOENT);
9064
9065
if (obj->state < OBJ_LOADED) {
9066
pr_warn("object not yet loaded; load it first\n");
9067
return libbpf_err(-ENOENT);
9068
}
9069
9070
bpf_object__for_each_program(prog, obj) {
9071
err = pathname_concat(buf, sizeof(buf), path, prog->name);
9072
if (err)
9073
goto err_unpin_programs;
9074
9075
err = bpf_program__pin(prog, buf);
9076
if (err)
9077
goto err_unpin_programs;
9078
}
9079
9080
return 0;
9081
9082
err_unpin_programs:
9083
while ((prog = bpf_object__prev_program(obj, prog))) {
9084
if (pathname_concat(buf, sizeof(buf), path, prog->name))
9085
continue;
9086
9087
bpf_program__unpin(prog, buf);
9088
}
9089
9090
return libbpf_err(err);
9091
}
9092
9093
int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
9094
{
9095
struct bpf_program *prog;
9096
int err;
9097
9098
if (!obj)
9099
return libbpf_err(-ENOENT);
9100
9101
bpf_object__for_each_program(prog, obj) {
9102
char buf[PATH_MAX];
9103
9104
err = pathname_concat(buf, sizeof(buf), path, prog->name);
9105
if (err)
9106
return libbpf_err(err);
9107
9108
err = bpf_program__unpin(prog, buf);
9109
if (err)
9110
return libbpf_err(err);
9111
}
9112
9113
return 0;
9114
}
9115
9116
int bpf_object__pin(struct bpf_object *obj, const char *path)
9117
{
9118
int err;
9119
9120
err = bpf_object__pin_maps(obj, path);
9121
if (err)
9122
return libbpf_err(err);
9123
9124
err = bpf_object__pin_programs(obj, path);
9125
if (err) {
9126
bpf_object__unpin_maps(obj, path);
9127
return libbpf_err(err);
9128
}
9129
9130
return 0;
9131
}
9132
9133
int bpf_object__unpin(struct bpf_object *obj, const char *path)
9134
{
9135
int err;
9136
9137
err = bpf_object__unpin_programs(obj, path);
9138
if (err)
9139
return libbpf_err(err);
9140
9141
err = bpf_object__unpin_maps(obj, path);
9142
if (err)
9143
return libbpf_err(err);
9144
9145
return 0;
9146
}
9147
9148
static void bpf_map__destroy(struct bpf_map *map)
9149
{
9150
if (map->inner_map) {
9151
bpf_map__destroy(map->inner_map);
9152
zfree(&map->inner_map);
9153
}
9154
9155
zfree(&map->init_slots);
9156
map->init_slots_sz = 0;
9157
9158
if (map->mmaped && map->mmaped != map->obj->arena_data)
9159
munmap(map->mmaped, bpf_map_mmap_sz(map));
9160
map->mmaped = NULL;
9161
9162
if (map->st_ops) {
9163
zfree(&map->st_ops->data);
9164
zfree(&map->st_ops->progs);
9165
zfree(&map->st_ops->kern_func_off);
9166
zfree(&map->st_ops);
9167
}
9168
9169
zfree(&map->name);
9170
zfree(&map->real_name);
9171
zfree(&map->pin_path);
9172
9173
if (map->fd >= 0)
9174
zclose(map->fd);
9175
}
9176
9177
void bpf_object__close(struct bpf_object *obj)
9178
{
9179
size_t i;
9180
9181
if (IS_ERR_OR_NULL(obj))
9182
return;
9183
9184
/*
9185
* if user called bpf_object__prepare() without ever getting to
9186
* bpf_object__load(), we need to clean up stuff that is normally
9187
* cleaned up at the end of loading step
9188
*/
9189
bpf_object_post_load_cleanup(obj);
9190
9191
usdt_manager_free(obj->usdt_man);
9192
obj->usdt_man = NULL;
9193
9194
bpf_gen__free(obj->gen_loader);
9195
bpf_object__elf_finish(obj);
9196
bpf_object_unload(obj);
9197
btf__free(obj->btf);
9198
btf__free(obj->btf_vmlinux);
9199
btf_ext__free(obj->btf_ext);
9200
9201
for (i = 0; i < obj->nr_maps; i++)
9202
bpf_map__destroy(&obj->maps[i]);
9203
9204
zfree(&obj->btf_custom_path);
9205
zfree(&obj->kconfig);
9206
9207
for (i = 0; i < obj->nr_extern; i++) {
9208
zfree(&obj->externs[i].name);
9209
zfree(&obj->externs[i].essent_name);
9210
}
9211
9212
zfree(&obj->externs);
9213
obj->nr_extern = 0;
9214
9215
zfree(&obj->maps);
9216
obj->nr_maps = 0;
9217
9218
if (obj->programs && obj->nr_programs) {
9219
for (i = 0; i < obj->nr_programs; i++)
9220
bpf_program__exit(&obj->programs[i]);
9221
}
9222
zfree(&obj->programs);
9223
9224
zfree(&obj->feat_cache);
9225
zfree(&obj->token_path);
9226
if (obj->token_fd > 0)
9227
close(obj->token_fd);
9228
9229
zfree(&obj->arena_data);
9230
9231
free(obj);
9232
}
9233
9234
const char *bpf_object__name(const struct bpf_object *obj)
9235
{
9236
return obj ? obj->name : libbpf_err_ptr(-EINVAL);
9237
}
9238
9239
unsigned int bpf_object__kversion(const struct bpf_object *obj)
9240
{
9241
return obj ? obj->kern_version : 0;
9242
}
9243
9244
int bpf_object__token_fd(const struct bpf_object *obj)
9245
{
9246
return obj->token_fd ?: -1;
9247
}
9248
9249
struct btf *bpf_object__btf(const struct bpf_object *obj)
9250
{
9251
return obj ? obj->btf : NULL;
9252
}
9253
9254
int bpf_object__btf_fd(const struct bpf_object *obj)
9255
{
9256
return obj->btf ? btf__fd(obj->btf) : -1;
9257
}
9258
9259
int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version)
9260
{
9261
if (obj->state >= OBJ_LOADED)
9262
return libbpf_err(-EINVAL);
9263
9264
obj->kern_version = kern_version;
9265
9266
return 0;
9267
}
9268
9269
int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts)
9270
{
9271
struct bpf_gen *gen;
9272
9273
if (!opts)
9274
return libbpf_err(-EFAULT);
9275
if (!OPTS_VALID(opts, gen_loader_opts))
9276
return libbpf_err(-EINVAL);
9277
gen = calloc(1, sizeof(*gen));
9278
if (!gen)
9279
return libbpf_err(-ENOMEM);
9280
gen->opts = opts;
9281
gen->swapped_endian = !is_native_endianness(obj);
9282
obj->gen_loader = gen;
9283
return 0;
9284
}
9285
9286
static struct bpf_program *
9287
__bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
9288
bool forward)
9289
{
9290
size_t nr_programs = obj->nr_programs;
9291
ssize_t idx;
9292
9293
if (!nr_programs)
9294
return NULL;
9295
9296
if (!p)
9297
/* Iter from the beginning */
9298
return forward ? &obj->programs[0] :
9299
&obj->programs[nr_programs - 1];
9300
9301
if (p->obj != obj) {
9302
pr_warn("error: program handler doesn't match object\n");
9303
return errno = EINVAL, NULL;
9304
}
9305
9306
idx = (p - obj->programs) + (forward ? 1 : -1);
9307
if (idx >= obj->nr_programs || idx < 0)
9308
return NULL;
9309
return &obj->programs[idx];
9310
}
9311
9312
struct bpf_program *
9313
bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev)
9314
{
9315
struct bpf_program *prog = prev;
9316
9317
do {
9318
prog = __bpf_program__iter(prog, obj, true);
9319
} while (prog && prog_is_subprog(obj, prog));
9320
9321
return prog;
9322
}
9323
9324
struct bpf_program *
9325
bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next)
9326
{
9327
struct bpf_program *prog = next;
9328
9329
do {
9330
prog = __bpf_program__iter(prog, obj, false);
9331
} while (prog && prog_is_subprog(obj, prog));
9332
9333
return prog;
9334
}
9335
9336
void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
9337
{
9338
prog->prog_ifindex = ifindex;
9339
}
9340
9341
const char *bpf_program__name(const struct bpf_program *prog)
9342
{
9343
return prog->name;
9344
}
9345
9346
const char *bpf_program__section_name(const struct bpf_program *prog)
9347
{
9348
return prog->sec_name;
9349
}
9350
9351
bool bpf_program__autoload(const struct bpf_program *prog)
9352
{
9353
return prog->autoload;
9354
}
9355
9356
int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
9357
{
9358
if (prog->obj->state >= OBJ_LOADED)
9359
return libbpf_err(-EINVAL);
9360
9361
prog->autoload = autoload;
9362
return 0;
9363
}
9364
9365
bool bpf_program__autoattach(const struct bpf_program *prog)
9366
{
9367
return prog->autoattach;
9368
}
9369
9370
void bpf_program__set_autoattach(struct bpf_program *prog, bool autoattach)
9371
{
9372
prog->autoattach = autoattach;
9373
}
9374
9375
const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog)
9376
{
9377
return prog->insns;
9378
}
9379
9380
size_t bpf_program__insn_cnt(const struct bpf_program *prog)
9381
{
9382
return prog->insns_cnt;
9383
}
9384
9385
int bpf_program__set_insns(struct bpf_program *prog,
9386
struct bpf_insn *new_insns, size_t new_insn_cnt)
9387
{
9388
struct bpf_insn *insns;
9389
9390
if (prog->obj->state >= OBJ_LOADED)
9391
return libbpf_err(-EBUSY);
9392
9393
insns = libbpf_reallocarray(prog->insns, new_insn_cnt, sizeof(*insns));
9394
/* NULL is a valid return from reallocarray if the new count is zero */
9395
if (!insns && new_insn_cnt) {
9396
pr_warn("prog '%s': failed to realloc prog code\n", prog->name);
9397
return libbpf_err(-ENOMEM);
9398
}
9399
memcpy(insns, new_insns, new_insn_cnt * sizeof(*insns));
9400
9401
prog->insns = insns;
9402
prog->insns_cnt = new_insn_cnt;
9403
return 0;
9404
}
9405
9406
int bpf_program__fd(const struct bpf_program *prog)
9407
{
9408
if (!prog)
9409
return libbpf_err(-EINVAL);
9410
9411
if (prog->fd < 0)
9412
return libbpf_err(-ENOENT);
9413
9414
return prog->fd;
9415
}
9416
9417
__alias(bpf_program__type)
9418
enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog);
9419
9420
enum bpf_prog_type bpf_program__type(const struct bpf_program *prog)
9421
{
9422
return prog->type;
9423
}
9424
9425
static size_t custom_sec_def_cnt;
9426
static struct bpf_sec_def *custom_sec_defs;
9427
static struct bpf_sec_def custom_fallback_def;
9428
static bool has_custom_fallback_def;
9429
static int last_custom_sec_def_handler_id;
9430
9431
int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
9432
{
9433
if (prog->obj->state >= OBJ_LOADED)
9434
return libbpf_err(-EBUSY);
9435
9436
/* if type is not changed, do nothing */
9437
if (prog->type == type)
9438
return 0;
9439
9440
prog->type = type;
9441
9442
/* If a program type was changed, we need to reset associated SEC()
9443
* handler, as it will be invalid now. The only exception is a generic
9444
* fallback handler, which by definition is program type-agnostic and
9445
* is a catch-all custom handler, optionally set by the application,
9446
* so should be able to handle any type of BPF program.
9447
*/
9448
if (prog->sec_def != &custom_fallback_def)
9449
prog->sec_def = NULL;
9450
return 0;
9451
}
9452
9453
__alias(bpf_program__expected_attach_type)
9454
enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog);
9455
9456
enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program *prog)
9457
{
9458
return prog->expected_attach_type;
9459
}
9460
9461
int bpf_program__set_expected_attach_type(struct bpf_program *prog,
9462
enum bpf_attach_type type)
9463
{
9464
if (prog->obj->state >= OBJ_LOADED)
9465
return libbpf_err(-EBUSY);
9466
9467
prog->expected_attach_type = type;
9468
return 0;
9469
}
9470
9471
__u32 bpf_program__flags(const struct bpf_program *prog)
9472
{
9473
return prog->prog_flags;
9474
}
9475
9476
int bpf_program__set_flags(struct bpf_program *prog, __u32 flags)
9477
{
9478
if (prog->obj->state >= OBJ_LOADED)
9479
return libbpf_err(-EBUSY);
9480
9481
prog->prog_flags = flags;
9482
return 0;
9483
}
9484
9485
__u32 bpf_program__log_level(const struct bpf_program *prog)
9486
{
9487
return prog->log_level;
9488
}
9489
9490
int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level)
9491
{
9492
if (prog->obj->state >= OBJ_LOADED)
9493
return libbpf_err(-EBUSY);
9494
9495
prog->log_level = log_level;
9496
return 0;
9497
}
9498
9499
const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size)
9500
{
9501
*log_size = prog->log_size;
9502
return prog->log_buf;
9503
}
9504
9505
int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size)
9506
{
9507
if (log_size && !log_buf)
9508
return libbpf_err(-EINVAL);
9509
if (prog->log_size > UINT_MAX)
9510
return libbpf_err(-EINVAL);
9511
if (prog->obj->state >= OBJ_LOADED)
9512
return libbpf_err(-EBUSY);
9513
9514
prog->log_buf = log_buf;
9515
prog->log_size = log_size;
9516
return 0;
9517
}
9518
9519
struct bpf_func_info *bpf_program__func_info(const struct bpf_program *prog)
9520
{
9521
if (prog->func_info_rec_size != sizeof(struct bpf_func_info))
9522
return libbpf_err_ptr(-EOPNOTSUPP);
9523
return prog->func_info;
9524
}
9525
9526
__u32 bpf_program__func_info_cnt(const struct bpf_program *prog)
9527
{
9528
return prog->func_info_cnt;
9529
}
9530
9531
struct bpf_line_info *bpf_program__line_info(const struct bpf_program *prog)
9532
{
9533
if (prog->line_info_rec_size != sizeof(struct bpf_line_info))
9534
return libbpf_err_ptr(-EOPNOTSUPP);
9535
return prog->line_info;
9536
}
9537
9538
__u32 bpf_program__line_info_cnt(const struct bpf_program *prog)
9539
{
9540
return prog->line_info_cnt;
9541
}
9542
9543
#define SEC_DEF(sec_pfx, ptype, atype, flags, ...) { \
9544
.sec = (char *)sec_pfx, \
9545
.prog_type = BPF_PROG_TYPE_##ptype, \
9546
.expected_attach_type = atype, \
9547
.cookie = (long)(flags), \
9548
.prog_prepare_load_fn = libbpf_prepare_prog_load, \
9549
__VA_ARGS__ \
9550
}
9551
9552
static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9553
static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9554
static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9555
static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9556
static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9557
static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9558
static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9559
static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9560
static int attach_kprobe_session(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9561
static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9562
static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9563
static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9564
9565
static const struct bpf_sec_def section_defs[] = {
9566
SEC_DEF("socket", SOCKET_FILTER, 0, SEC_NONE),
9567
SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE),
9568
SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE),
9569
SEC_DEF("kprobe+", KPROBE, 0, SEC_NONE, attach_kprobe),
9570
SEC_DEF("uprobe+", KPROBE, 0, SEC_NONE, attach_uprobe),
9571
SEC_DEF("uprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe),
9572
SEC_DEF("kretprobe+", KPROBE, 0, SEC_NONE, attach_kprobe),
9573
SEC_DEF("uretprobe+", KPROBE, 0, SEC_NONE, attach_uprobe),
9574
SEC_DEF("uretprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe),
9575
SEC_DEF("kprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
9576
SEC_DEF("kretprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
9577
SEC_DEF("kprobe.session+", KPROBE, BPF_TRACE_KPROBE_SESSION, SEC_NONE, attach_kprobe_session),
9578
SEC_DEF("uprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi),
9579
SEC_DEF("uretprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi),
9580
SEC_DEF("uprobe.session+", KPROBE, BPF_TRACE_UPROBE_SESSION, SEC_NONE, attach_uprobe_multi),
9581
SEC_DEF("uprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi),
9582
SEC_DEF("uretprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi),
9583
SEC_DEF("uprobe.session.s+", KPROBE, BPF_TRACE_UPROBE_SESSION, SEC_SLEEPABLE, attach_uprobe_multi),
9584
SEC_DEF("ksyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall),
9585
SEC_DEF("kretsyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall),
9586
SEC_DEF("usdt+", KPROBE, 0, SEC_USDT, attach_usdt),
9587
SEC_DEF("usdt.s+", KPROBE, 0, SEC_USDT | SEC_SLEEPABLE, attach_usdt),
9588
SEC_DEF("tc/ingress", SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE), /* alias for tcx */
9589
SEC_DEF("tc/egress", SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE), /* alias for tcx */
9590
SEC_DEF("tcx/ingress", SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE),
9591
SEC_DEF("tcx/egress", SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE),
9592
SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */
9593
SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */
9594
SEC_DEF("action", SCHED_ACT, 0, SEC_NONE), /* deprecated / legacy, use tcx */
9595
SEC_DEF("netkit/primary", SCHED_CLS, BPF_NETKIT_PRIMARY, SEC_NONE),
9596
SEC_DEF("netkit/peer", SCHED_CLS, BPF_NETKIT_PEER, SEC_NONE),
9597
SEC_DEF("tracepoint+", TRACEPOINT, 0, SEC_NONE, attach_tp),
9598
SEC_DEF("tp+", TRACEPOINT, 0, SEC_NONE, attach_tp),
9599
SEC_DEF("raw_tracepoint+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
9600
SEC_DEF("raw_tp+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
9601
SEC_DEF("raw_tracepoint.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp),
9602
SEC_DEF("raw_tp.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp),
9603
SEC_DEF("tp_btf+", TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace),
9604
SEC_DEF("fentry+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace),
9605
SEC_DEF("fmod_ret+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace),
9606
SEC_DEF("fexit+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace),
9607
SEC_DEF("fentry.s+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
9608
SEC_DEF("fmod_ret.s+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
9609
SEC_DEF("fexit.s+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
9610
SEC_DEF("freplace+", EXT, 0, SEC_ATTACH_BTF, attach_trace),
9611
SEC_DEF("lsm+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm),
9612
SEC_DEF("lsm.s+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
9613
SEC_DEF("lsm_cgroup+", LSM, BPF_LSM_CGROUP, SEC_ATTACH_BTF),
9614
SEC_DEF("iter+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter),
9615
SEC_DEF("iter.s+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter),
9616
SEC_DEF("syscall", SYSCALL, 0, SEC_SLEEPABLE),
9617
SEC_DEF("xdp.frags/devmap", XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS),
9618
SEC_DEF("xdp/devmap", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE),
9619
SEC_DEF("xdp.frags/cpumap", XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS),
9620
SEC_DEF("xdp/cpumap", XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE),
9621
SEC_DEF("xdp.frags", XDP, BPF_XDP, SEC_XDP_FRAGS),
9622
SEC_DEF("xdp", XDP, BPF_XDP, SEC_ATTACHABLE_OPT),
9623
SEC_DEF("perf_event", PERF_EVENT, 0, SEC_NONE),
9624
SEC_DEF("lwt_in", LWT_IN, 0, SEC_NONE),
9625
SEC_DEF("lwt_out", LWT_OUT, 0, SEC_NONE),
9626
SEC_DEF("lwt_xmit", LWT_XMIT, 0, SEC_NONE),
9627
SEC_DEF("lwt_seg6local", LWT_SEG6LOCAL, 0, SEC_NONE),
9628
SEC_DEF("sockops", SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT),
9629
SEC_DEF("sk_skb/stream_parser", SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT),
9630
SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT),
9631
SEC_DEF("sk_skb/verdict", SK_SKB, BPF_SK_SKB_VERDICT, SEC_ATTACHABLE_OPT),
9632
SEC_DEF("sk_skb", SK_SKB, 0, SEC_NONE),
9633
SEC_DEF("sk_msg", SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT),
9634
SEC_DEF("lirc_mode2", LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT),
9635
SEC_DEF("flow_dissector", FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT),
9636
SEC_DEF("cgroup_skb/ingress", CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT),
9637
SEC_DEF("cgroup_skb/egress", CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT),
9638
SEC_DEF("cgroup/skb", CGROUP_SKB, 0, SEC_NONE),
9639
SEC_DEF("cgroup/sock_create", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE),
9640
SEC_DEF("cgroup/sock_release", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE),
9641
SEC_DEF("cgroup/sock", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT),
9642
SEC_DEF("cgroup/post_bind4", CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE),
9643
SEC_DEF("cgroup/post_bind6", CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE),
9644
SEC_DEF("cgroup/bind4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE),
9645
SEC_DEF("cgroup/bind6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE),
9646
SEC_DEF("cgroup/connect4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE),
9647
SEC_DEF("cgroup/connect6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE),
9648
SEC_DEF("cgroup/connect_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_CONNECT, SEC_ATTACHABLE),
9649
SEC_DEF("cgroup/sendmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE),
9650
SEC_DEF("cgroup/sendmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE),
9651
SEC_DEF("cgroup/sendmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_SENDMSG, SEC_ATTACHABLE),
9652
SEC_DEF("cgroup/recvmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE),
9653
SEC_DEF("cgroup/recvmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE),
9654
SEC_DEF("cgroup/recvmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_RECVMSG, SEC_ATTACHABLE),
9655
SEC_DEF("cgroup/getpeername4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE),
9656
SEC_DEF("cgroup/getpeername6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE),
9657
SEC_DEF("cgroup/getpeername_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETPEERNAME, SEC_ATTACHABLE),
9658
SEC_DEF("cgroup/getsockname4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE),
9659
SEC_DEF("cgroup/getsockname6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE),
9660
SEC_DEF("cgroup/getsockname_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETSOCKNAME, SEC_ATTACHABLE),
9661
SEC_DEF("cgroup/sysctl", CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE),
9662
SEC_DEF("cgroup/getsockopt", CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE),
9663
SEC_DEF("cgroup/setsockopt", CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE),
9664
SEC_DEF("cgroup/dev", CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT),
9665
SEC_DEF("struct_ops+", STRUCT_OPS, 0, SEC_NONE),
9666
SEC_DEF("struct_ops.s+", STRUCT_OPS, 0, SEC_SLEEPABLE),
9667
SEC_DEF("sk_lookup", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE),
9668
SEC_DEF("netfilter", NETFILTER, BPF_NETFILTER, SEC_NONE),
9669
};
9670
9671
int libbpf_register_prog_handler(const char *sec,
9672
enum bpf_prog_type prog_type,
9673
enum bpf_attach_type exp_attach_type,
9674
const struct libbpf_prog_handler_opts *opts)
9675
{
9676
struct bpf_sec_def *sec_def;
9677
9678
if (!OPTS_VALID(opts, libbpf_prog_handler_opts))
9679
return libbpf_err(-EINVAL);
9680
9681
if (last_custom_sec_def_handler_id == INT_MAX) /* prevent overflow */
9682
return libbpf_err(-E2BIG);
9683
9684
if (sec) {
9685
sec_def = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt + 1,
9686
sizeof(*sec_def));
9687
if (!sec_def)
9688
return libbpf_err(-ENOMEM);
9689
9690
custom_sec_defs = sec_def;
9691
sec_def = &custom_sec_defs[custom_sec_def_cnt];
9692
} else {
9693
if (has_custom_fallback_def)
9694
return libbpf_err(-EBUSY);
9695
9696
sec_def = &custom_fallback_def;
9697
}
9698
9699
sec_def->sec = sec ? strdup(sec) : NULL;
9700
if (sec && !sec_def->sec)
9701
return libbpf_err(-ENOMEM);
9702
9703
sec_def->prog_type = prog_type;
9704
sec_def->expected_attach_type = exp_attach_type;
9705
sec_def->cookie = OPTS_GET(opts, cookie, 0);
9706
9707
sec_def->prog_setup_fn = OPTS_GET(opts, prog_setup_fn, NULL);
9708
sec_def->prog_prepare_load_fn = OPTS_GET(opts, prog_prepare_load_fn, NULL);
9709
sec_def->prog_attach_fn = OPTS_GET(opts, prog_attach_fn, NULL);
9710
9711
sec_def->handler_id = ++last_custom_sec_def_handler_id;
9712
9713
if (sec)
9714
custom_sec_def_cnt++;
9715
else
9716
has_custom_fallback_def = true;
9717
9718
return sec_def->handler_id;
9719
}
9720
9721
int libbpf_unregister_prog_handler(int handler_id)
9722
{
9723
struct bpf_sec_def *sec_defs;
9724
int i;
9725
9726
if (handler_id <= 0)
9727
return libbpf_err(-EINVAL);
9728
9729
if (has_custom_fallback_def && custom_fallback_def.handler_id == handler_id) {
9730
memset(&custom_fallback_def, 0, sizeof(custom_fallback_def));
9731
has_custom_fallback_def = false;
9732
return 0;
9733
}
9734
9735
for (i = 0; i < custom_sec_def_cnt; i++) {
9736
if (custom_sec_defs[i].handler_id == handler_id)
9737
break;
9738
}
9739
9740
if (i == custom_sec_def_cnt)
9741
return libbpf_err(-ENOENT);
9742
9743
free(custom_sec_defs[i].sec);
9744
for (i = i + 1; i < custom_sec_def_cnt; i++)
9745
custom_sec_defs[i - 1] = custom_sec_defs[i];
9746
custom_sec_def_cnt--;
9747
9748
/* try to shrink the array, but it's ok if we couldn't */
9749
sec_defs = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt, sizeof(*sec_defs));
9750
/* if new count is zero, reallocarray can return a valid NULL result;
9751
* in this case the previous pointer will be freed, so we *have to*
9752
* reassign old pointer to the new value (even if it's NULL)
9753
*/
9754
if (sec_defs || custom_sec_def_cnt == 0)
9755
custom_sec_defs = sec_defs;
9756
9757
return 0;
9758
}
9759
9760
static bool sec_def_matches(const struct bpf_sec_def *sec_def, const char *sec_name)
9761
{
9762
size_t len = strlen(sec_def->sec);
9763
9764
/* "type/" always has to have proper SEC("type/extras") form */
9765
if (sec_def->sec[len - 1] == '/') {
9766
if (str_has_pfx(sec_name, sec_def->sec))
9767
return true;
9768
return false;
9769
}
9770
9771
/* "type+" means it can be either exact SEC("type") or
9772
* well-formed SEC("type/extras") with proper '/' separator
9773
*/
9774
if (sec_def->sec[len - 1] == '+') {
9775
len--;
9776
/* not even a prefix */
9777
if (strncmp(sec_name, sec_def->sec, len) != 0)
9778
return false;
9779
/* exact match or has '/' separator */
9780
if (sec_name[len] == '\0' || sec_name[len] == '/')
9781
return true;
9782
return false;
9783
}
9784
9785
return strcmp(sec_name, sec_def->sec) == 0;
9786
}
9787
9788
static const struct bpf_sec_def *find_sec_def(const char *sec_name)
9789
{
9790
const struct bpf_sec_def *sec_def;
9791
int i, n;
9792
9793
n = custom_sec_def_cnt;
9794
for (i = 0; i < n; i++) {
9795
sec_def = &custom_sec_defs[i];
9796
if (sec_def_matches(sec_def, sec_name))
9797
return sec_def;
9798
}
9799
9800
n = ARRAY_SIZE(section_defs);
9801
for (i = 0; i < n; i++) {
9802
sec_def = &section_defs[i];
9803
if (sec_def_matches(sec_def, sec_name))
9804
return sec_def;
9805
}
9806
9807
if (has_custom_fallback_def)
9808
return &custom_fallback_def;
9809
9810
return NULL;
9811
}
9812
9813
#define MAX_TYPE_NAME_SIZE 32
9814
9815
static char *libbpf_get_type_names(bool attach_type)
9816
{
9817
int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE;
9818
char *buf;
9819
9820
buf = malloc(len);
9821
if (!buf)
9822
return NULL;
9823
9824
buf[0] = '\0';
9825
/* Forge string buf with all available names */
9826
for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
9827
const struct bpf_sec_def *sec_def = &section_defs[i];
9828
9829
if (attach_type) {
9830
if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load)
9831
continue;
9832
9833
if (!(sec_def->cookie & SEC_ATTACHABLE))
9834
continue;
9835
}
9836
9837
if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) {
9838
free(buf);
9839
return NULL;
9840
}
9841
strcat(buf, " ");
9842
strcat(buf, section_defs[i].sec);
9843
}
9844
9845
return buf;
9846
}
9847
9848
int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
9849
enum bpf_attach_type *expected_attach_type)
9850
{
9851
const struct bpf_sec_def *sec_def;
9852
char *type_names;
9853
9854
if (!name)
9855
return libbpf_err(-EINVAL);
9856
9857
sec_def = find_sec_def(name);
9858
if (sec_def) {
9859
*prog_type = sec_def->prog_type;
9860
*expected_attach_type = sec_def->expected_attach_type;
9861
return 0;
9862
}
9863
9864
pr_debug("failed to guess program type from ELF section '%s'\n", name);
9865
type_names = libbpf_get_type_names(false);
9866
if (type_names != NULL) {
9867
pr_debug("supported section(type) names are:%s\n", type_names);
9868
free(type_names);
9869
}
9870
9871
return libbpf_err(-ESRCH);
9872
}
9873
9874
const char *libbpf_bpf_attach_type_str(enum bpf_attach_type t)
9875
{
9876
if (t < 0 || t >= ARRAY_SIZE(attach_type_name))
9877
return NULL;
9878
9879
return attach_type_name[t];
9880
}
9881
9882
const char *libbpf_bpf_link_type_str(enum bpf_link_type t)
9883
{
9884
if (t < 0 || t >= ARRAY_SIZE(link_type_name))
9885
return NULL;
9886
9887
return link_type_name[t];
9888
}
9889
9890
const char *libbpf_bpf_map_type_str(enum bpf_map_type t)
9891
{
9892
if (t < 0 || t >= ARRAY_SIZE(map_type_name))
9893
return NULL;
9894
9895
return map_type_name[t];
9896
}
9897
9898
const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t)
9899
{
9900
if (t < 0 || t >= ARRAY_SIZE(prog_type_name))
9901
return NULL;
9902
9903
return prog_type_name[t];
9904
}
9905
9906
static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
9907
int sec_idx,
9908
size_t offset)
9909
{
9910
struct bpf_map *map;
9911
size_t i;
9912
9913
for (i = 0; i < obj->nr_maps; i++) {
9914
map = &obj->maps[i];
9915
if (!bpf_map__is_struct_ops(map))
9916
continue;
9917
if (map->sec_idx == sec_idx &&
9918
map->sec_offset <= offset &&
9919
offset - map->sec_offset < map->def.value_size)
9920
return map;
9921
}
9922
9923
return NULL;
9924
}
9925
9926
/* Collect the reloc from ELF, populate the st_ops->progs[], and update
9927
* st_ops->data for shadow type.
9928
*/
9929
static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
9930
Elf64_Shdr *shdr, Elf_Data *data)
9931
{
9932
const struct btf_type *type;
9933
const struct btf_member *member;
9934
struct bpf_struct_ops *st_ops;
9935
struct bpf_program *prog;
9936
unsigned int shdr_idx;
9937
const struct btf *btf;
9938
struct bpf_map *map;
9939
unsigned int moff, insn_idx;
9940
const char *name;
9941
__u32 member_idx;
9942
Elf64_Sym *sym;
9943
Elf64_Rel *rel;
9944
int i, nrels;
9945
9946
btf = obj->btf;
9947
nrels = shdr->sh_size / shdr->sh_entsize;
9948
for (i = 0; i < nrels; i++) {
9949
rel = elf_rel_by_idx(data, i);
9950
if (!rel) {
9951
pr_warn("struct_ops reloc: failed to get %d reloc\n", i);
9952
return -LIBBPF_ERRNO__FORMAT;
9953
}
9954
9955
sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info));
9956
if (!sym) {
9957
pr_warn("struct_ops reloc: symbol %zx not found\n",
9958
(size_t)ELF64_R_SYM(rel->r_info));
9959
return -LIBBPF_ERRNO__FORMAT;
9960
}
9961
9962
name = elf_sym_str(obj, sym->st_name) ?: "<?>";
9963
map = find_struct_ops_map_by_offset(obj, shdr->sh_info, rel->r_offset);
9964
if (!map) {
9965
pr_warn("struct_ops reloc: cannot find map at rel->r_offset %zu\n",
9966
(size_t)rel->r_offset);
9967
return -EINVAL;
9968
}
9969
9970
moff = rel->r_offset - map->sec_offset;
9971
shdr_idx = sym->st_shndx;
9972
st_ops = map->st_ops;
9973
pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel->r_offset %zu map->sec_offset %zu name %d (\'%s\')\n",
9974
map->name,
9975
(long long)(rel->r_info >> 32),
9976
(long long)sym->st_value,
9977
shdr_idx, (size_t)rel->r_offset,
9978
map->sec_offset, sym->st_name, name);
9979
9980
if (shdr_idx >= SHN_LORESERVE) {
9981
pr_warn("struct_ops reloc %s: rel->r_offset %zu shdr_idx %u unsupported non-static function\n",
9982
map->name, (size_t)rel->r_offset, shdr_idx);
9983
return -LIBBPF_ERRNO__RELOC;
9984
}
9985
if (sym->st_value % BPF_INSN_SZ) {
9986
pr_warn("struct_ops reloc %s: invalid target program offset %llu\n",
9987
map->name, (unsigned long long)sym->st_value);
9988
return -LIBBPF_ERRNO__FORMAT;
9989
}
9990
insn_idx = sym->st_value / BPF_INSN_SZ;
9991
9992
type = btf__type_by_id(btf, st_ops->type_id);
9993
member = find_member_by_offset(type, moff * 8);
9994
if (!member) {
9995
pr_warn("struct_ops reloc %s: cannot find member at moff %u\n",
9996
map->name, moff);
9997
return -EINVAL;
9998
}
9999
member_idx = member - btf_members(type);
10000
name = btf__name_by_offset(btf, member->name_off);
10001
10002
if (!resolve_func_ptr(btf, member->type, NULL)) {
10003
pr_warn("struct_ops reloc %s: cannot relocate non func ptr %s\n",
10004
map->name, name);
10005
return -EINVAL;
10006
}
10007
10008
prog = find_prog_by_sec_insn(obj, shdr_idx, insn_idx);
10009
if (!prog) {
10010
pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n",
10011
map->name, shdr_idx, name);
10012
return -EINVAL;
10013
}
10014
10015
/* prevent the use of BPF prog with invalid type */
10016
if (prog->type != BPF_PROG_TYPE_STRUCT_OPS) {
10017
pr_warn("struct_ops reloc %s: prog %s is not struct_ops BPF program\n",
10018
map->name, prog->name);
10019
return -EINVAL;
10020
}
10021
10022
st_ops->progs[member_idx] = prog;
10023
10024
/* st_ops->data will be exposed to users, being returned by
10025
* bpf_map__initial_value() as a pointer to the shadow
10026
* type. All function pointers in the original struct type
10027
* should be converted to a pointer to struct bpf_program
10028
* in the shadow type.
10029
*/
10030
*((struct bpf_program **)(st_ops->data + moff)) = prog;
10031
}
10032
10033
return 0;
10034
}
10035
10036
#define BTF_TRACE_PREFIX "btf_trace_"
10037
#define BTF_LSM_PREFIX "bpf_lsm_"
10038
#define BTF_ITER_PREFIX "bpf_iter_"
10039
#define BTF_MAX_NAME_SIZE 128
10040
10041
void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
10042
const char **prefix, int *kind)
10043
{
10044
switch (attach_type) {
10045
case BPF_TRACE_RAW_TP:
10046
*prefix = BTF_TRACE_PREFIX;
10047
*kind = BTF_KIND_TYPEDEF;
10048
break;
10049
case BPF_LSM_MAC:
10050
case BPF_LSM_CGROUP:
10051
*prefix = BTF_LSM_PREFIX;
10052
*kind = BTF_KIND_FUNC;
10053
break;
10054
case BPF_TRACE_ITER:
10055
*prefix = BTF_ITER_PREFIX;
10056
*kind = BTF_KIND_FUNC;
10057
break;
10058
default:
10059
*prefix = "";
10060
*kind = BTF_KIND_FUNC;
10061
}
10062
}
10063
10064
static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
10065
const char *name, __u32 kind)
10066
{
10067
char btf_type_name[BTF_MAX_NAME_SIZE];
10068
int ret;
10069
10070
ret = snprintf(btf_type_name, sizeof(btf_type_name),
10071
"%s%s", prefix, name);
10072
/* snprintf returns the number of characters written excluding the
10073
* terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
10074
* indicates truncation.
10075
*/
10076
if (ret < 0 || ret >= sizeof(btf_type_name))
10077
return -ENAMETOOLONG;
10078
return btf__find_by_name_kind(btf, btf_type_name, kind);
10079
}
10080
10081
static inline int find_attach_btf_id(struct btf *btf, const char *name,
10082
enum bpf_attach_type attach_type)
10083
{
10084
const char *prefix;
10085
int kind;
10086
10087
btf_get_kernel_prefix_kind(attach_type, &prefix, &kind);
10088
return find_btf_by_prefix_kind(btf, prefix, name, kind);
10089
}
10090
10091
int libbpf_find_vmlinux_btf_id(const char *name,
10092
enum bpf_attach_type attach_type)
10093
{
10094
struct btf *btf;
10095
int err;
10096
10097
btf = btf__load_vmlinux_btf();
10098
err = libbpf_get_error(btf);
10099
if (err) {
10100
pr_warn("vmlinux BTF is not found\n");
10101
return libbpf_err(err);
10102
}
10103
10104
err = find_attach_btf_id(btf, name, attach_type);
10105
if (err <= 0)
10106
pr_warn("%s is not found in vmlinux BTF\n", name);
10107
10108
btf__free(btf);
10109
return libbpf_err(err);
10110
}
10111
10112
static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd, int token_fd)
10113
{
10114
struct bpf_prog_info info;
10115
__u32 info_len = sizeof(info);
10116
struct btf *btf;
10117
int err;
10118
10119
memset(&info, 0, info_len);
10120
err = bpf_prog_get_info_by_fd(attach_prog_fd, &info, &info_len);
10121
if (err) {
10122
pr_warn("failed bpf_prog_get_info_by_fd for FD %d: %s\n",
10123
attach_prog_fd, errstr(err));
10124
return err;
10125
}
10126
10127
err = -EINVAL;
10128
if (!info.btf_id) {
10129
pr_warn("The target program doesn't have BTF\n");
10130
goto out;
10131
}
10132
btf = btf_load_from_kernel(info.btf_id, NULL, token_fd);
10133
err = libbpf_get_error(btf);
10134
if (err) {
10135
pr_warn("Failed to get BTF %d of the program: %s\n", info.btf_id, errstr(err));
10136
goto out;
10137
}
10138
err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
10139
btf__free(btf);
10140
if (err <= 0) {
10141
pr_warn("%s is not found in prog's BTF\n", name);
10142
goto out;
10143
}
10144
out:
10145
return err;
10146
}
10147
10148
static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name,
10149
enum bpf_attach_type attach_type,
10150
int *btf_obj_fd, int *btf_type_id)
10151
{
10152
int ret, i, mod_len = 0;
10153
const char *fn_name, *mod_name = NULL;
10154
10155
fn_name = strchr(attach_name, ':');
10156
if (fn_name) {
10157
mod_name = attach_name;
10158
mod_len = fn_name - mod_name;
10159
fn_name++;
10160
}
10161
10162
if (!mod_name || strncmp(mod_name, "vmlinux", mod_len) == 0) {
10163
ret = find_attach_btf_id(obj->btf_vmlinux,
10164
mod_name ? fn_name : attach_name,
10165
attach_type);
10166
if (ret > 0) {
10167
*btf_obj_fd = 0; /* vmlinux BTF */
10168
*btf_type_id = ret;
10169
return 0;
10170
}
10171
if (ret != -ENOENT)
10172
return ret;
10173
}
10174
10175
ret = load_module_btfs(obj);
10176
if (ret)
10177
return ret;
10178
10179
for (i = 0; i < obj->btf_module_cnt; i++) {
10180
const struct module_btf *mod = &obj->btf_modules[i];
10181
10182
if (mod_name && strncmp(mod->name, mod_name, mod_len) != 0)
10183
continue;
10184
10185
ret = find_attach_btf_id(mod->btf,
10186
mod_name ? fn_name : attach_name,
10187
attach_type);
10188
if (ret > 0) {
10189
*btf_obj_fd = mod->fd;
10190
*btf_type_id = ret;
10191
return 0;
10192
}
10193
if (ret == -ENOENT)
10194
continue;
10195
10196
return ret;
10197
}
10198
10199
return -ESRCH;
10200
}
10201
10202
static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name,
10203
int *btf_obj_fd, int *btf_type_id)
10204
{
10205
enum bpf_attach_type attach_type = prog->expected_attach_type;
10206
__u32 attach_prog_fd = prog->attach_prog_fd;
10207
int err = 0;
10208
10209
/* BPF program's BTF ID */
10210
if (prog->type == BPF_PROG_TYPE_EXT || attach_prog_fd) {
10211
if (!attach_prog_fd) {
10212
pr_warn("prog '%s': attach program FD is not set\n", prog->name);
10213
return -EINVAL;
10214
}
10215
err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd, prog->obj->token_fd);
10216
if (err < 0) {
10217
pr_warn("prog '%s': failed to find BPF program (FD %d) BTF ID for '%s': %s\n",
10218
prog->name, attach_prog_fd, attach_name, errstr(err));
10219
return err;
10220
}
10221
*btf_obj_fd = 0;
10222
*btf_type_id = err;
10223
return 0;
10224
}
10225
10226
/* kernel/module BTF ID */
10227
if (prog->obj->gen_loader) {
10228
bpf_gen__record_attach_target(prog->obj->gen_loader, attach_name, attach_type);
10229
*btf_obj_fd = 0;
10230
*btf_type_id = 1;
10231
} else {
10232
err = find_kernel_btf_id(prog->obj, attach_name,
10233
attach_type, btf_obj_fd,
10234
btf_type_id);
10235
}
10236
if (err) {
10237
pr_warn("prog '%s': failed to find kernel BTF type ID of '%s': %s\n",
10238
prog->name, attach_name, errstr(err));
10239
return err;
10240
}
10241
return 0;
10242
}
10243
10244
int libbpf_attach_type_by_name(const char *name,
10245
enum bpf_attach_type *attach_type)
10246
{
10247
char *type_names;
10248
const struct bpf_sec_def *sec_def;
10249
10250
if (!name)
10251
return libbpf_err(-EINVAL);
10252
10253
sec_def = find_sec_def(name);
10254
if (!sec_def) {
10255
pr_debug("failed to guess attach type based on ELF section name '%s'\n", name);
10256
type_names = libbpf_get_type_names(true);
10257
if (type_names != NULL) {
10258
pr_debug("attachable section(type) names are:%s\n", type_names);
10259
free(type_names);
10260
}
10261
10262
return libbpf_err(-EINVAL);
10263
}
10264
10265
if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load)
10266
return libbpf_err(-EINVAL);
10267
if (!(sec_def->cookie & SEC_ATTACHABLE))
10268
return libbpf_err(-EINVAL);
10269
10270
*attach_type = sec_def->expected_attach_type;
10271
return 0;
10272
}
10273
10274
int bpf_map__fd(const struct bpf_map *map)
10275
{
10276
if (!map)
10277
return libbpf_err(-EINVAL);
10278
if (!map_is_created(map))
10279
return -1;
10280
return map->fd;
10281
}
10282
10283
static bool map_uses_real_name(const struct bpf_map *map)
10284
{
10285
/* Since libbpf started to support custom .data.* and .rodata.* maps,
10286
* their user-visible name differs from kernel-visible name. Users see
10287
* such map's corresponding ELF section name as a map name.
10288
* This check distinguishes .data/.rodata from .data.* and .rodata.*
10289
* maps to know which name has to be returned to the user.
10290
*/
10291
if (map->libbpf_type == LIBBPF_MAP_DATA && strcmp(map->real_name, DATA_SEC) != 0)
10292
return true;
10293
if (map->libbpf_type == LIBBPF_MAP_RODATA && strcmp(map->real_name, RODATA_SEC) != 0)
10294
return true;
10295
return false;
10296
}
10297
10298
const char *bpf_map__name(const struct bpf_map *map)
10299
{
10300
if (!map)
10301
return NULL;
10302
10303
if (map_uses_real_name(map))
10304
return map->real_name;
10305
10306
return map->name;
10307
}
10308
10309
enum bpf_map_type bpf_map__type(const struct bpf_map *map)
10310
{
10311
return map->def.type;
10312
}
10313
10314
int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type)
10315
{
10316
if (map_is_created(map))
10317
return libbpf_err(-EBUSY);
10318
map->def.type = type;
10319
return 0;
10320
}
10321
10322
__u32 bpf_map__map_flags(const struct bpf_map *map)
10323
{
10324
return map->def.map_flags;
10325
}
10326
10327
int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags)
10328
{
10329
if (map_is_created(map))
10330
return libbpf_err(-EBUSY);
10331
map->def.map_flags = flags;
10332
return 0;
10333
}
10334
10335
__u64 bpf_map__map_extra(const struct bpf_map *map)
10336
{
10337
return map->map_extra;
10338
}
10339
10340
int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra)
10341
{
10342
if (map_is_created(map))
10343
return libbpf_err(-EBUSY);
10344
map->map_extra = map_extra;
10345
return 0;
10346
}
10347
10348
__u32 bpf_map__numa_node(const struct bpf_map *map)
10349
{
10350
return map->numa_node;
10351
}
10352
10353
int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node)
10354
{
10355
if (map_is_created(map))
10356
return libbpf_err(-EBUSY);
10357
map->numa_node = numa_node;
10358
return 0;
10359
}
10360
10361
__u32 bpf_map__key_size(const struct bpf_map *map)
10362
{
10363
return map->def.key_size;
10364
}
10365
10366
int bpf_map__set_key_size(struct bpf_map *map, __u32 size)
10367
{
10368
if (map_is_created(map))
10369
return libbpf_err(-EBUSY);
10370
map->def.key_size = size;
10371
return 0;
10372
}
10373
10374
__u32 bpf_map__value_size(const struct bpf_map *map)
10375
{
10376
return map->def.value_size;
10377
}
10378
10379
static int map_btf_datasec_resize(struct bpf_map *map, __u32 size)
10380
{
10381
struct btf *btf;
10382
struct btf_type *datasec_type, *var_type;
10383
struct btf_var_secinfo *var;
10384
const struct btf_type *array_type;
10385
const struct btf_array *array;
10386
int vlen, element_sz, new_array_id;
10387
__u32 nr_elements;
10388
10389
/* check btf existence */
10390
btf = bpf_object__btf(map->obj);
10391
if (!btf)
10392
return -ENOENT;
10393
10394
/* verify map is datasec */
10395
datasec_type = btf_type_by_id(btf, bpf_map__btf_value_type_id(map));
10396
if (!btf_is_datasec(datasec_type)) {
10397
pr_warn("map '%s': cannot be resized, map value type is not a datasec\n",
10398
bpf_map__name(map));
10399
return -EINVAL;
10400
}
10401
10402
/* verify datasec has at least one var */
10403
vlen = btf_vlen(datasec_type);
10404
if (vlen == 0) {
10405
pr_warn("map '%s': cannot be resized, map value datasec is empty\n",
10406
bpf_map__name(map));
10407
return -EINVAL;
10408
}
10409
10410
/* verify last var in the datasec is an array */
10411
var = &btf_var_secinfos(datasec_type)[vlen - 1];
10412
var_type = btf_type_by_id(btf, var->type);
10413
array_type = skip_mods_and_typedefs(btf, var_type->type, NULL);
10414
if (!btf_is_array(array_type)) {
10415
pr_warn("map '%s': cannot be resized, last var must be an array\n",
10416
bpf_map__name(map));
10417
return -EINVAL;
10418
}
10419
10420
/* verify request size aligns with array */
10421
array = btf_array(array_type);
10422
element_sz = btf__resolve_size(btf, array->type);
10423
if (element_sz <= 0 || (size - var->offset) % element_sz != 0) {
10424
pr_warn("map '%s': cannot be resized, element size (%d) doesn't align with new total size (%u)\n",
10425
bpf_map__name(map), element_sz, size);
10426
return -EINVAL;
10427
}
10428
10429
/* create a new array based on the existing array, but with new length */
10430
nr_elements = (size - var->offset) / element_sz;
10431
new_array_id = btf__add_array(btf, array->index_type, array->type, nr_elements);
10432
if (new_array_id < 0)
10433
return new_array_id;
10434
10435
/* adding a new btf type invalidates existing pointers to btf objects,
10436
* so refresh pointers before proceeding
10437
*/
10438
datasec_type = btf_type_by_id(btf, map->btf_value_type_id);
10439
var = &btf_var_secinfos(datasec_type)[vlen - 1];
10440
var_type = btf_type_by_id(btf, var->type);
10441
10442
/* finally update btf info */
10443
datasec_type->size = size;
10444
var->size = size - var->offset;
10445
var_type->type = new_array_id;
10446
10447
return 0;
10448
}
10449
10450
int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
10451
{
10452
if (map_is_created(map))
10453
return libbpf_err(-EBUSY);
10454
10455
if (map->mmaped) {
10456
size_t mmap_old_sz, mmap_new_sz;
10457
int err;
10458
10459
if (map->def.type != BPF_MAP_TYPE_ARRAY)
10460
return libbpf_err(-EOPNOTSUPP);
10461
10462
mmap_old_sz = bpf_map_mmap_sz(map);
10463
mmap_new_sz = array_map_mmap_sz(size, map->def.max_entries);
10464
err = bpf_map_mmap_resize(map, mmap_old_sz, mmap_new_sz);
10465
if (err) {
10466
pr_warn("map '%s': failed to resize memory-mapped region: %s\n",
10467
bpf_map__name(map), errstr(err));
10468
return libbpf_err(err);
10469
}
10470
err = map_btf_datasec_resize(map, size);
10471
if (err && err != -ENOENT) {
10472
pr_warn("map '%s': failed to adjust resized BTF, clearing BTF key/value info: %s\n",
10473
bpf_map__name(map), errstr(err));
10474
map->btf_value_type_id = 0;
10475
map->btf_key_type_id = 0;
10476
}
10477
}
10478
10479
map->def.value_size = size;
10480
return 0;
10481
}
10482
10483
__u32 bpf_map__btf_key_type_id(const struct bpf_map *map)
10484
{
10485
return map ? map->btf_key_type_id : 0;
10486
}
10487
10488
__u32 bpf_map__btf_value_type_id(const struct bpf_map *map)
10489
{
10490
return map ? map->btf_value_type_id : 0;
10491
}
10492
10493
int bpf_map__set_initial_value(struct bpf_map *map,
10494
const void *data, size_t size)
10495
{
10496
size_t actual_sz;
10497
10498
if (map_is_created(map))
10499
return libbpf_err(-EBUSY);
10500
10501
if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG)
10502
return libbpf_err(-EINVAL);
10503
10504
if (map->def.type == BPF_MAP_TYPE_ARENA)
10505
actual_sz = map->obj->arena_data_sz;
10506
else
10507
actual_sz = map->def.value_size;
10508
if (size != actual_sz)
10509
return libbpf_err(-EINVAL);
10510
10511
memcpy(map->mmaped, data, size);
10512
return 0;
10513
}
10514
10515
void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize)
10516
{
10517
if (bpf_map__is_struct_ops(map)) {
10518
if (psize)
10519
*psize = map->def.value_size;
10520
return map->st_ops->data;
10521
}
10522
10523
if (!map->mmaped)
10524
return NULL;
10525
10526
if (map->def.type == BPF_MAP_TYPE_ARENA)
10527
*psize = map->obj->arena_data_sz;
10528
else
10529
*psize = map->def.value_size;
10530
10531
return map->mmaped;
10532
}
10533
10534
bool bpf_map__is_internal(const struct bpf_map *map)
10535
{
10536
return map->libbpf_type != LIBBPF_MAP_UNSPEC;
10537
}
10538
10539
__u32 bpf_map__ifindex(const struct bpf_map *map)
10540
{
10541
return map->map_ifindex;
10542
}
10543
10544
int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
10545
{
10546
if (map_is_created(map))
10547
return libbpf_err(-EBUSY);
10548
map->map_ifindex = ifindex;
10549
return 0;
10550
}
10551
10552
int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
10553
{
10554
if (!bpf_map_type__is_map_in_map(map->def.type)) {
10555
pr_warn("error: unsupported map type\n");
10556
return libbpf_err(-EINVAL);
10557
}
10558
if (map->inner_map_fd != -1) {
10559
pr_warn("error: inner_map_fd already specified\n");
10560
return libbpf_err(-EINVAL);
10561
}
10562
if (map->inner_map) {
10563
bpf_map__destroy(map->inner_map);
10564
zfree(&map->inner_map);
10565
}
10566
map->inner_map_fd = fd;
10567
return 0;
10568
}
10569
10570
int bpf_map__set_exclusive_program(struct bpf_map *map, struct bpf_program *prog)
10571
{
10572
if (map_is_created(map)) {
10573
pr_warn("exclusive programs must be set before map creation\n");
10574
return libbpf_err(-EINVAL);
10575
}
10576
10577
if (map->obj != prog->obj) {
10578
pr_warn("excl_prog and map must be from the same bpf object\n");
10579
return libbpf_err(-EINVAL);
10580
}
10581
10582
map->excl_prog = prog;
10583
return 0;
10584
}
10585
10586
struct bpf_program *bpf_map__exclusive_program(struct bpf_map *map)
10587
{
10588
return map->excl_prog;
10589
}
10590
10591
static struct bpf_map *
10592
__bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
10593
{
10594
ssize_t idx;
10595
struct bpf_map *s, *e;
10596
10597
if (!obj || !obj->maps)
10598
return errno = EINVAL, NULL;
10599
10600
s = obj->maps;
10601
e = obj->maps + obj->nr_maps;
10602
10603
if ((m < s) || (m >= e)) {
10604
pr_warn("error in %s: map handler doesn't belong to object\n",
10605
__func__);
10606
return errno = EINVAL, NULL;
10607
}
10608
10609
idx = (m - obj->maps) + i;
10610
if (idx >= obj->nr_maps || idx < 0)
10611
return NULL;
10612
return &obj->maps[idx];
10613
}
10614
10615
struct bpf_map *
10616
bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev)
10617
{
10618
if (prev == NULL && obj != NULL)
10619
return obj->maps;
10620
10621
return __bpf_map__iter(prev, obj, 1);
10622
}
10623
10624
struct bpf_map *
10625
bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *next)
10626
{
10627
if (next == NULL && obj != NULL) {
10628
if (!obj->nr_maps)
10629
return NULL;
10630
return obj->maps + obj->nr_maps - 1;
10631
}
10632
10633
return __bpf_map__iter(next, obj, -1);
10634
}
10635
10636
struct bpf_map *
10637
bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name)
10638
{
10639
struct bpf_map *pos;
10640
10641
bpf_object__for_each_map(pos, obj) {
10642
/* if it's a special internal map name (which always starts
10643
* with dot) then check if that special name matches the
10644
* real map name (ELF section name)
10645
*/
10646
if (name[0] == '.') {
10647
if (pos->real_name && strcmp(pos->real_name, name) == 0)
10648
return pos;
10649
continue;
10650
}
10651
/* otherwise map name has to be an exact match */
10652
if (map_uses_real_name(pos)) {
10653
if (strcmp(pos->real_name, name) == 0)
10654
return pos;
10655
continue;
10656
}
10657
if (strcmp(pos->name, name) == 0)
10658
return pos;
10659
}
10660
return errno = ENOENT, NULL;
10661
}
10662
10663
int
10664
bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
10665
{
10666
return bpf_map__fd(bpf_object__find_map_by_name(obj, name));
10667
}
10668
10669
static int validate_map_op(const struct bpf_map *map, size_t key_sz,
10670
size_t value_sz, bool check_value_sz)
10671
{
10672
if (!map_is_created(map)) /* map is not yet created */
10673
return -ENOENT;
10674
10675
if (map->def.key_size != key_sz) {
10676
pr_warn("map '%s': unexpected key size %zu provided, expected %u\n",
10677
map->name, key_sz, map->def.key_size);
10678
return -EINVAL;
10679
}
10680
10681
if (map->fd < 0) {
10682
pr_warn("map '%s': can't use BPF map without FD (was it created?)\n", map->name);
10683
return -EINVAL;
10684
}
10685
10686
if (!check_value_sz)
10687
return 0;
10688
10689
switch (map->def.type) {
10690
case BPF_MAP_TYPE_PERCPU_ARRAY:
10691
case BPF_MAP_TYPE_PERCPU_HASH:
10692
case BPF_MAP_TYPE_LRU_PERCPU_HASH:
10693
case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: {
10694
int num_cpu = libbpf_num_possible_cpus();
10695
size_t elem_sz = roundup(map->def.value_size, 8);
10696
10697
if (value_sz != num_cpu * elem_sz) {
10698
pr_warn("map '%s': unexpected value size %zu provided for per-CPU map, expected %d * %zu = %zd\n",
10699
map->name, value_sz, num_cpu, elem_sz, num_cpu * elem_sz);
10700
return -EINVAL;
10701
}
10702
break;
10703
}
10704
default:
10705
if (map->def.value_size != value_sz) {
10706
pr_warn("map '%s': unexpected value size %zu provided, expected %u\n",
10707
map->name, value_sz, map->def.value_size);
10708
return -EINVAL;
10709
}
10710
break;
10711
}
10712
return 0;
10713
}
10714
10715
int bpf_map__lookup_elem(const struct bpf_map *map,
10716
const void *key, size_t key_sz,
10717
void *value, size_t value_sz, __u64 flags)
10718
{
10719
int err;
10720
10721
err = validate_map_op(map, key_sz, value_sz, true);
10722
if (err)
10723
return libbpf_err(err);
10724
10725
return bpf_map_lookup_elem_flags(map->fd, key, value, flags);
10726
}
10727
10728
int bpf_map__update_elem(const struct bpf_map *map,
10729
const void *key, size_t key_sz,
10730
const void *value, size_t value_sz, __u64 flags)
10731
{
10732
int err;
10733
10734
err = validate_map_op(map, key_sz, value_sz, true);
10735
if (err)
10736
return libbpf_err(err);
10737
10738
return bpf_map_update_elem(map->fd, key, value, flags);
10739
}
10740
10741
int bpf_map__delete_elem(const struct bpf_map *map,
10742
const void *key, size_t key_sz, __u64 flags)
10743
{
10744
int err;
10745
10746
err = validate_map_op(map, key_sz, 0, false /* check_value_sz */);
10747
if (err)
10748
return libbpf_err(err);
10749
10750
return bpf_map_delete_elem_flags(map->fd, key, flags);
10751
}
10752
10753
int bpf_map__lookup_and_delete_elem(const struct bpf_map *map,
10754
const void *key, size_t key_sz,
10755
void *value, size_t value_sz, __u64 flags)
10756
{
10757
int err;
10758
10759
err = validate_map_op(map, key_sz, value_sz, true);
10760
if (err)
10761
return libbpf_err(err);
10762
10763
return bpf_map_lookup_and_delete_elem_flags(map->fd, key, value, flags);
10764
}
10765
10766
int bpf_map__get_next_key(const struct bpf_map *map,
10767
const void *cur_key, void *next_key, size_t key_sz)
10768
{
10769
int err;
10770
10771
err = validate_map_op(map, key_sz, 0, false /* check_value_sz */);
10772
if (err)
10773
return libbpf_err(err);
10774
10775
return bpf_map_get_next_key(map->fd, cur_key, next_key);
10776
}
10777
10778
long libbpf_get_error(const void *ptr)
10779
{
10780
if (!IS_ERR_OR_NULL(ptr))
10781
return 0;
10782
10783
if (IS_ERR(ptr))
10784
errno = -PTR_ERR(ptr);
10785
10786
/* If ptr == NULL, then errno should be already set by the failing
10787
* API, because libbpf never returns NULL on success and it now always
10788
* sets errno on error. So no extra errno handling for ptr == NULL
10789
* case.
10790
*/
10791
return -errno;
10792
}
10793
10794
/* Replace link's underlying BPF program with the new one */
10795
int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
10796
{
10797
int ret;
10798
int prog_fd = bpf_program__fd(prog);
10799
10800
if (prog_fd < 0) {
10801
pr_warn("prog '%s': can't use BPF program without FD (was it loaded?)\n",
10802
prog->name);
10803
return libbpf_err(-EINVAL);
10804
}
10805
10806
ret = bpf_link_update(bpf_link__fd(link), prog_fd, NULL);
10807
return libbpf_err_errno(ret);
10808
}
10809
10810
/* Release "ownership" of underlying BPF resource (typically, BPF program
10811
* attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected
10812
* link, when destructed through bpf_link__destroy() call won't attempt to
10813
* detach/unregisted that BPF resource. This is useful in situations where,
10814
* say, attached BPF program has to outlive userspace program that attached it
10815
* in the system. Depending on type of BPF program, though, there might be
10816
* additional steps (like pinning BPF program in BPF FS) necessary to ensure
10817
* exit of userspace program doesn't trigger automatic detachment and clean up
10818
* inside the kernel.
10819
*/
10820
void bpf_link__disconnect(struct bpf_link *link)
10821
{
10822
link->disconnected = true;
10823
}
10824
10825
int bpf_link__destroy(struct bpf_link *link)
10826
{
10827
int err = 0;
10828
10829
if (IS_ERR_OR_NULL(link))
10830
return 0;
10831
10832
if (!link->disconnected && link->detach)
10833
err = link->detach(link);
10834
if (link->pin_path)
10835
free(link->pin_path);
10836
if (link->dealloc)
10837
link->dealloc(link);
10838
else
10839
free(link);
10840
10841
return libbpf_err(err);
10842
}
10843
10844
int bpf_link__fd(const struct bpf_link *link)
10845
{
10846
return link->fd;
10847
}
10848
10849
const char *bpf_link__pin_path(const struct bpf_link *link)
10850
{
10851
return link->pin_path;
10852
}
10853
10854
static int bpf_link__detach_fd(struct bpf_link *link)
10855
{
10856
return libbpf_err_errno(close(link->fd));
10857
}
10858
10859
struct bpf_link *bpf_link__open(const char *path)
10860
{
10861
struct bpf_link *link;
10862
int fd;
10863
10864
fd = bpf_obj_get(path);
10865
if (fd < 0) {
10866
fd = -errno;
10867
pr_warn("failed to open link at %s: %d\n", path, fd);
10868
return libbpf_err_ptr(fd);
10869
}
10870
10871
link = calloc(1, sizeof(*link));
10872
if (!link) {
10873
close(fd);
10874
return libbpf_err_ptr(-ENOMEM);
10875
}
10876
link->detach = &bpf_link__detach_fd;
10877
link->fd = fd;
10878
10879
link->pin_path = strdup(path);
10880
if (!link->pin_path) {
10881
bpf_link__destroy(link);
10882
return libbpf_err_ptr(-ENOMEM);
10883
}
10884
10885
return link;
10886
}
10887
10888
int bpf_link__detach(struct bpf_link *link)
10889
{
10890
return bpf_link_detach(link->fd) ? -errno : 0;
10891
}
10892
10893
int bpf_link__pin(struct bpf_link *link, const char *path)
10894
{
10895
int err;
10896
10897
if (link->pin_path)
10898
return libbpf_err(-EBUSY);
10899
err = make_parent_dir(path);
10900
if (err)
10901
return libbpf_err(err);
10902
err = check_path(path);
10903
if (err)
10904
return libbpf_err(err);
10905
10906
link->pin_path = strdup(path);
10907
if (!link->pin_path)
10908
return libbpf_err(-ENOMEM);
10909
10910
if (bpf_obj_pin(link->fd, link->pin_path)) {
10911
err = -errno;
10912
zfree(&link->pin_path);
10913
return libbpf_err(err);
10914
}
10915
10916
pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path);
10917
return 0;
10918
}
10919
10920
int bpf_link__unpin(struct bpf_link *link)
10921
{
10922
int err;
10923
10924
if (!link->pin_path)
10925
return libbpf_err(-EINVAL);
10926
10927
err = unlink(link->pin_path);
10928
if (err != 0)
10929
return -errno;
10930
10931
pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path);
10932
zfree(&link->pin_path);
10933
return 0;
10934
}
10935
10936
struct bpf_link_perf {
10937
struct bpf_link link;
10938
int perf_event_fd;
10939
/* legacy kprobe support: keep track of probe identifier and type */
10940
char *legacy_probe_name;
10941
bool legacy_is_kprobe;
10942
bool legacy_is_retprobe;
10943
};
10944
10945
static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe);
10946
static int remove_uprobe_event_legacy(const char *probe_name, bool retprobe);
10947
10948
static int bpf_link_perf_detach(struct bpf_link *link)
10949
{
10950
struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
10951
int err = 0;
10952
10953
if (ioctl(perf_link->perf_event_fd, PERF_EVENT_IOC_DISABLE, 0) < 0)
10954
err = -errno;
10955
10956
if (perf_link->perf_event_fd != link->fd)
10957
close(perf_link->perf_event_fd);
10958
close(link->fd);
10959
10960
/* legacy uprobe/kprobe needs to be removed after perf event fd closure */
10961
if (perf_link->legacy_probe_name) {
10962
if (perf_link->legacy_is_kprobe) {
10963
err = remove_kprobe_event_legacy(perf_link->legacy_probe_name,
10964
perf_link->legacy_is_retprobe);
10965
} else {
10966
err = remove_uprobe_event_legacy(perf_link->legacy_probe_name,
10967
perf_link->legacy_is_retprobe);
10968
}
10969
}
10970
10971
return err;
10972
}
10973
10974
static void bpf_link_perf_dealloc(struct bpf_link *link)
10975
{
10976
struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
10977
10978
free(perf_link->legacy_probe_name);
10979
free(perf_link);
10980
}
10981
10982
struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd,
10983
const struct bpf_perf_event_opts *opts)
10984
{
10985
struct bpf_link_perf *link;
10986
int prog_fd, link_fd = -1, err;
10987
bool force_ioctl_attach;
10988
10989
if (!OPTS_VALID(opts, bpf_perf_event_opts))
10990
return libbpf_err_ptr(-EINVAL);
10991
10992
if (pfd < 0) {
10993
pr_warn("prog '%s': invalid perf event FD %d\n",
10994
prog->name, pfd);
10995
return libbpf_err_ptr(-EINVAL);
10996
}
10997
prog_fd = bpf_program__fd(prog);
10998
if (prog_fd < 0) {
10999
pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n",
11000
prog->name);
11001
return libbpf_err_ptr(-EINVAL);
11002
}
11003
11004
link = calloc(1, sizeof(*link));
11005
if (!link)
11006
return libbpf_err_ptr(-ENOMEM);
11007
link->link.detach = &bpf_link_perf_detach;
11008
link->link.dealloc = &bpf_link_perf_dealloc;
11009
link->perf_event_fd = pfd;
11010
11011
force_ioctl_attach = OPTS_GET(opts, force_ioctl_attach, false);
11012
if (kernel_supports(prog->obj, FEAT_PERF_LINK) && !force_ioctl_attach) {
11013
DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_opts,
11014
.perf_event.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0));
11015
11016
link_fd = bpf_link_create(prog_fd, pfd, BPF_PERF_EVENT, &link_opts);
11017
if (link_fd < 0) {
11018
err = -errno;
11019
pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %s\n",
11020
prog->name, pfd, errstr(err));
11021
goto err_out;
11022
}
11023
link->link.fd = link_fd;
11024
} else {
11025
if (OPTS_GET(opts, bpf_cookie, 0)) {
11026
pr_warn("prog '%s': user context value is not supported\n", prog->name);
11027
err = -EOPNOTSUPP;
11028
goto err_out;
11029
}
11030
11031
if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
11032
err = -errno;
11033
pr_warn("prog '%s': failed to attach to perf_event FD %d: %s\n",
11034
prog->name, pfd, errstr(err));
11035
if (err == -EPROTO)
11036
pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
11037
prog->name, pfd);
11038
goto err_out;
11039
}
11040
link->link.fd = pfd;
11041
}
11042
11043
if (!OPTS_GET(opts, dont_enable, false)) {
11044
if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
11045
err = -errno;
11046
pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n",
11047
prog->name, pfd, errstr(err));
11048
goto err_out;
11049
}
11050
}
11051
11052
return &link->link;
11053
err_out:
11054
if (link_fd >= 0)
11055
close(link_fd);
11056
free(link);
11057
return libbpf_err_ptr(err);
11058
}
11059
11060
struct bpf_link *bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd)
11061
{
11062
return bpf_program__attach_perf_event_opts(prog, pfd, NULL);
11063
}
11064
11065
/*
11066
* this function is expected to parse integer in the range of [0, 2^31-1] from
11067
* given file using scanf format string fmt. If actual parsed value is
11068
* negative, the result might be indistinguishable from error
11069
*/
11070
static int parse_uint_from_file(const char *file, const char *fmt)
11071
{
11072
int err, ret;
11073
FILE *f;
11074
11075
f = fopen(file, "re");
11076
if (!f) {
11077
err = -errno;
11078
pr_debug("failed to open '%s': %s\n", file, errstr(err));
11079
return err;
11080
}
11081
err = fscanf(f, fmt, &ret);
11082
if (err != 1) {
11083
err = err == EOF ? -EIO : -errno;
11084
pr_debug("failed to parse '%s': %s\n", file, errstr(err));
11085
fclose(f);
11086
return err;
11087
}
11088
fclose(f);
11089
return ret;
11090
}
11091
11092
static int determine_kprobe_perf_type(void)
11093
{
11094
const char *file = "/sys/bus/event_source/devices/kprobe/type";
11095
11096
return parse_uint_from_file(file, "%d\n");
11097
}
11098
11099
static int determine_uprobe_perf_type(void)
11100
{
11101
const char *file = "/sys/bus/event_source/devices/uprobe/type";
11102
11103
return parse_uint_from_file(file, "%d\n");
11104
}
11105
11106
static int determine_kprobe_retprobe_bit(void)
11107
{
11108
const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe";
11109
11110
return parse_uint_from_file(file, "config:%d\n");
11111
}
11112
11113
static int determine_uprobe_retprobe_bit(void)
11114
{
11115
const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
11116
11117
return parse_uint_from_file(file, "config:%d\n");
11118
}
11119
11120
#define PERF_UPROBE_REF_CTR_OFFSET_BITS 32
11121
#define PERF_UPROBE_REF_CTR_OFFSET_SHIFT 32
11122
11123
static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
11124
uint64_t offset, int pid, size_t ref_ctr_off)
11125
{
11126
const size_t attr_sz = sizeof(struct perf_event_attr);
11127
struct perf_event_attr attr;
11128
int type, pfd;
11129
11130
if ((__u64)ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS))
11131
return -EINVAL;
11132
11133
memset(&attr, 0, attr_sz);
11134
11135
type = uprobe ? determine_uprobe_perf_type()
11136
: determine_kprobe_perf_type();
11137
if (type < 0) {
11138
pr_warn("failed to determine %s perf type: %s\n",
11139
uprobe ? "uprobe" : "kprobe",
11140
errstr(type));
11141
return type;
11142
}
11143
if (retprobe) {
11144
int bit = uprobe ? determine_uprobe_retprobe_bit()
11145
: determine_kprobe_retprobe_bit();
11146
11147
if (bit < 0) {
11148
pr_warn("failed to determine %s retprobe bit: %s\n",
11149
uprobe ? "uprobe" : "kprobe",
11150
errstr(bit));
11151
return bit;
11152
}
11153
attr.config |= 1 << bit;
11154
}
11155
attr.size = attr_sz;
11156
attr.type = type;
11157
attr.config |= (__u64)ref_ctr_off << PERF_UPROBE_REF_CTR_OFFSET_SHIFT;
11158
attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */
11159
attr.config2 = offset; /* kprobe_addr or probe_offset */
11160
11161
/* pid filter is meaningful only for uprobes */
11162
pfd = syscall(__NR_perf_event_open, &attr,
11163
pid < 0 ? -1 : pid /* pid */,
11164
pid == -1 ? 0 : -1 /* cpu */,
11165
-1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
11166
return pfd >= 0 ? pfd : -errno;
11167
}
11168
11169
static int append_to_file(const char *file, const char *fmt, ...)
11170
{
11171
int fd, n, err = 0;
11172
va_list ap;
11173
char buf[1024];
11174
11175
va_start(ap, fmt);
11176
n = vsnprintf(buf, sizeof(buf), fmt, ap);
11177
va_end(ap);
11178
11179
if (n < 0 || n >= sizeof(buf))
11180
return -EINVAL;
11181
11182
fd = open(file, O_WRONLY | O_APPEND | O_CLOEXEC, 0);
11183
if (fd < 0)
11184
return -errno;
11185
11186
if (write(fd, buf, n) < 0)
11187
err = -errno;
11188
11189
close(fd);
11190
return err;
11191
}
11192
11193
#define DEBUGFS "/sys/kernel/debug/tracing"
11194
#define TRACEFS "/sys/kernel/tracing"
11195
11196
static bool use_debugfs(void)
11197
{
11198
static int has_debugfs = -1;
11199
11200
if (has_debugfs < 0)
11201
has_debugfs = faccessat(AT_FDCWD, DEBUGFS, F_OK, AT_EACCESS) == 0;
11202
11203
return has_debugfs == 1;
11204
}
11205
11206
static const char *tracefs_path(void)
11207
{
11208
return use_debugfs() ? DEBUGFS : TRACEFS;
11209
}
11210
11211
static const char *tracefs_kprobe_events(void)
11212
{
11213
return use_debugfs() ? DEBUGFS"/kprobe_events" : TRACEFS"/kprobe_events";
11214
}
11215
11216
static const char *tracefs_uprobe_events(void)
11217
{
11218
return use_debugfs() ? DEBUGFS"/uprobe_events" : TRACEFS"/uprobe_events";
11219
}
11220
11221
static const char *tracefs_available_filter_functions(void)
11222
{
11223
return use_debugfs() ? DEBUGFS"/available_filter_functions"
11224
: TRACEFS"/available_filter_functions";
11225
}
11226
11227
static const char *tracefs_available_filter_functions_addrs(void)
11228
{
11229
return use_debugfs() ? DEBUGFS"/available_filter_functions_addrs"
11230
: TRACEFS"/available_filter_functions_addrs";
11231
}
11232
11233
static void gen_probe_legacy_event_name(char *buf, size_t buf_sz,
11234
const char *name, size_t offset)
11235
{
11236
static int index = 0;
11237
int i;
11238
11239
snprintf(buf, buf_sz, "libbpf_%u_%d_%s_0x%zx", getpid(),
11240
__sync_fetch_and_add(&index, 1), name, offset);
11241
11242
/* sanitize name in the probe name */
11243
for (i = 0; buf[i]; i++) {
11244
if (!isalnum(buf[i]))
11245
buf[i] = '_';
11246
}
11247
}
11248
11249
static int add_kprobe_event_legacy(const char *probe_name, bool retprobe,
11250
const char *kfunc_name, size_t offset)
11251
{
11252
return append_to_file(tracefs_kprobe_events(), "%c:%s/%s %s+0x%zx",
11253
retprobe ? 'r' : 'p',
11254
retprobe ? "kretprobes" : "kprobes",
11255
probe_name, kfunc_name, offset);
11256
}
11257
11258
static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe)
11259
{
11260
return append_to_file(tracefs_kprobe_events(), "-:%s/%s",
11261
retprobe ? "kretprobes" : "kprobes", probe_name);
11262
}
11263
11264
static int determine_kprobe_perf_type_legacy(const char *probe_name, bool retprobe)
11265
{
11266
char file[256];
11267
11268
snprintf(file, sizeof(file), "%s/events/%s/%s/id",
11269
tracefs_path(), retprobe ? "kretprobes" : "kprobes", probe_name);
11270
11271
return parse_uint_from_file(file, "%d\n");
11272
}
11273
11274
static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe,
11275
const char *kfunc_name, size_t offset, int pid)
11276
{
11277
const size_t attr_sz = sizeof(struct perf_event_attr);
11278
struct perf_event_attr attr;
11279
int type, pfd, err;
11280
11281
err = add_kprobe_event_legacy(probe_name, retprobe, kfunc_name, offset);
11282
if (err < 0) {
11283
pr_warn("failed to add legacy kprobe event for '%s+0x%zx': %s\n",
11284
kfunc_name, offset,
11285
errstr(err));
11286
return err;
11287
}
11288
type = determine_kprobe_perf_type_legacy(probe_name, retprobe);
11289
if (type < 0) {
11290
err = type;
11291
pr_warn("failed to determine legacy kprobe event id for '%s+0x%zx': %s\n",
11292
kfunc_name, offset,
11293
errstr(err));
11294
goto err_clean_legacy;
11295
}
11296
11297
memset(&attr, 0, attr_sz);
11298
attr.size = attr_sz;
11299
attr.config = type;
11300
attr.type = PERF_TYPE_TRACEPOINT;
11301
11302
pfd = syscall(__NR_perf_event_open, &attr,
11303
pid < 0 ? -1 : pid, /* pid */
11304
pid == -1 ? 0 : -1, /* cpu */
11305
-1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
11306
if (pfd < 0) {
11307
err = -errno;
11308
pr_warn("legacy kprobe perf_event_open() failed: %s\n",
11309
errstr(err));
11310
goto err_clean_legacy;
11311
}
11312
return pfd;
11313
11314
err_clean_legacy:
11315
/* Clear the newly added legacy kprobe_event */
11316
remove_kprobe_event_legacy(probe_name, retprobe);
11317
return err;
11318
}
11319
11320
static const char *arch_specific_syscall_pfx(void)
11321
{
11322
#if defined(__x86_64__)
11323
return "x64";
11324
#elif defined(__i386__)
11325
return "ia32";
11326
#elif defined(__s390x__)
11327
return "s390x";
11328
#elif defined(__s390__)
11329
return "s390";
11330
#elif defined(__arm__)
11331
return "arm";
11332
#elif defined(__aarch64__)
11333
return "arm64";
11334
#elif defined(__mips__)
11335
return "mips";
11336
#elif defined(__riscv)
11337
return "riscv";
11338
#elif defined(__powerpc__)
11339
return "powerpc";
11340
#elif defined(__powerpc64__)
11341
return "powerpc64";
11342
#else
11343
return NULL;
11344
#endif
11345
}
11346
11347
int probe_kern_syscall_wrapper(int token_fd)
11348
{
11349
char syscall_name[64];
11350
const char *ksys_pfx;
11351
11352
ksys_pfx = arch_specific_syscall_pfx();
11353
if (!ksys_pfx)
11354
return 0;
11355
11356
snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx);
11357
11358
if (determine_kprobe_perf_type() >= 0) {
11359
int pfd;
11360
11361
pfd = perf_event_open_probe(false, false, syscall_name, 0, getpid(), 0);
11362
if (pfd >= 0)
11363
close(pfd);
11364
11365
return pfd >= 0 ? 1 : 0;
11366
} else { /* legacy mode */
11367
char probe_name[MAX_EVENT_NAME_LEN];
11368
11369
gen_probe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0);
11370
if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0)
11371
return 0;
11372
11373
(void)remove_kprobe_event_legacy(probe_name, false);
11374
return 1;
11375
}
11376
}
11377
11378
struct bpf_link *
11379
bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
11380
const char *func_name,
11381
const struct bpf_kprobe_opts *opts)
11382
{
11383
DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
11384
enum probe_attach_mode attach_mode;
11385
char *legacy_probe = NULL;
11386
struct bpf_link *link;
11387
size_t offset;
11388
bool retprobe, legacy;
11389
int pfd, err;
11390
11391
if (!OPTS_VALID(opts, bpf_kprobe_opts))
11392
return libbpf_err_ptr(-EINVAL);
11393
11394
attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT);
11395
retprobe = OPTS_GET(opts, retprobe, false);
11396
offset = OPTS_GET(opts, offset, 0);
11397
pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
11398
11399
legacy = determine_kprobe_perf_type() < 0;
11400
switch (attach_mode) {
11401
case PROBE_ATTACH_MODE_LEGACY:
11402
legacy = true;
11403
pe_opts.force_ioctl_attach = true;
11404
break;
11405
case PROBE_ATTACH_MODE_PERF:
11406
if (legacy)
11407
return libbpf_err_ptr(-ENOTSUP);
11408
pe_opts.force_ioctl_attach = true;
11409
break;
11410
case PROBE_ATTACH_MODE_LINK:
11411
if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK))
11412
return libbpf_err_ptr(-ENOTSUP);
11413
break;
11414
case PROBE_ATTACH_MODE_DEFAULT:
11415
break;
11416
default:
11417
return libbpf_err_ptr(-EINVAL);
11418
}
11419
11420
if (!legacy) {
11421
pfd = perf_event_open_probe(false /* uprobe */, retprobe,
11422
func_name, offset,
11423
-1 /* pid */, 0 /* ref_ctr_off */);
11424
} else {
11425
char probe_name[MAX_EVENT_NAME_LEN];
11426
11427
gen_probe_legacy_event_name(probe_name, sizeof(probe_name),
11428
func_name, offset);
11429
11430
legacy_probe = strdup(probe_name);
11431
if (!legacy_probe)
11432
return libbpf_err_ptr(-ENOMEM);
11433
11434
pfd = perf_event_kprobe_open_legacy(legacy_probe, retprobe, func_name,
11435
offset, -1 /* pid */);
11436
}
11437
if (pfd < 0) {
11438
err = -errno;
11439
pr_warn("prog '%s': failed to create %s '%s+0x%zx' perf event: %s\n",
11440
prog->name, retprobe ? "kretprobe" : "kprobe",
11441
func_name, offset,
11442
errstr(err));
11443
goto err_out;
11444
}
11445
link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
11446
err = libbpf_get_error(link);
11447
if (err) {
11448
close(pfd);
11449
pr_warn("prog '%s': failed to attach to %s '%s+0x%zx': %s\n",
11450
prog->name, retprobe ? "kretprobe" : "kprobe",
11451
func_name, offset,
11452
errstr(err));
11453
goto err_clean_legacy;
11454
}
11455
if (legacy) {
11456
struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
11457
11458
perf_link->legacy_probe_name = legacy_probe;
11459
perf_link->legacy_is_kprobe = true;
11460
perf_link->legacy_is_retprobe = retprobe;
11461
}
11462
11463
return link;
11464
11465
err_clean_legacy:
11466
if (legacy)
11467
remove_kprobe_event_legacy(legacy_probe, retprobe);
11468
err_out:
11469
free(legacy_probe);
11470
return libbpf_err_ptr(err);
11471
}
11472
11473
struct bpf_link *bpf_program__attach_kprobe(const struct bpf_program *prog,
11474
bool retprobe,
11475
const char *func_name)
11476
{
11477
DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts,
11478
.retprobe = retprobe,
11479
);
11480
11481
return bpf_program__attach_kprobe_opts(prog, func_name, &opts);
11482
}
11483
11484
struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog,
11485
const char *syscall_name,
11486
const struct bpf_ksyscall_opts *opts)
11487
{
11488
LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts);
11489
char func_name[128];
11490
11491
if (!OPTS_VALID(opts, bpf_ksyscall_opts))
11492
return libbpf_err_ptr(-EINVAL);
11493
11494
if (kernel_supports(prog->obj, FEAT_SYSCALL_WRAPPER)) {
11495
/* arch_specific_syscall_pfx() should never return NULL here
11496
* because it is guarded by kernel_supports(). However, since
11497
* compiler does not know that we have an explicit conditional
11498
* as well.
11499
*/
11500
snprintf(func_name, sizeof(func_name), "__%s_sys_%s",
11501
arch_specific_syscall_pfx() ? : "", syscall_name);
11502
} else {
11503
snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name);
11504
}
11505
11506
kprobe_opts.retprobe = OPTS_GET(opts, retprobe, false);
11507
kprobe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
11508
11509
return bpf_program__attach_kprobe_opts(prog, func_name, &kprobe_opts);
11510
}
11511
11512
/* Adapted from perf/util/string.c */
11513
bool glob_match(const char *str, const char *pat)
11514
{
11515
while (*str && *pat && *pat != '*') {
11516
if (*pat == '?') { /* Matches any single character */
11517
str++;
11518
pat++;
11519
continue;
11520
}
11521
if (*str != *pat)
11522
return false;
11523
str++;
11524
pat++;
11525
}
11526
/* Check wild card */
11527
if (*pat == '*') {
11528
while (*pat == '*')
11529
pat++;
11530
if (!*pat) /* Tail wild card matches all */
11531
return true;
11532
while (*str)
11533
if (glob_match(str++, pat))
11534
return true;
11535
}
11536
return !*str && !*pat;
11537
}
11538
11539
struct kprobe_multi_resolve {
11540
const char *pattern;
11541
unsigned long *addrs;
11542
size_t cap;
11543
size_t cnt;
11544
};
11545
11546
struct avail_kallsyms_data {
11547
char **syms;
11548
size_t cnt;
11549
struct kprobe_multi_resolve *res;
11550
};
11551
11552
static int avail_func_cmp(const void *a, const void *b)
11553
{
11554
return strcmp(*(const char **)a, *(const char **)b);
11555
}
11556
11557
static int avail_kallsyms_cb(unsigned long long sym_addr, char sym_type,
11558
const char *sym_name, void *ctx)
11559
{
11560
struct avail_kallsyms_data *data = ctx;
11561
struct kprobe_multi_resolve *res = data->res;
11562
int err;
11563
11564
if (!glob_match(sym_name, res->pattern))
11565
return 0;
11566
11567
if (!bsearch(&sym_name, data->syms, data->cnt, sizeof(*data->syms), avail_func_cmp)) {
11568
/* Some versions of kernel strip out .llvm.<hash> suffix from
11569
* function names reported in available_filter_functions, but
11570
* don't do so for kallsyms. While this is clearly a kernel
11571
* bug (fixed by [0]) we try to accommodate that in libbpf to
11572
* make multi-kprobe usability a bit better: if no match is
11573
* found, we will strip .llvm. suffix and try one more time.
11574
*
11575
* [0] fb6a421fb615 ("kallsyms: Match symbols exactly with CONFIG_LTO_CLANG")
11576
*/
11577
char sym_trim[256], *psym_trim = sym_trim, *sym_sfx;
11578
11579
if (!(sym_sfx = strstr(sym_name, ".llvm.")))
11580
return 0;
11581
11582
/* psym_trim vs sym_trim dance is done to avoid pointer vs array
11583
* coercion differences and get proper `const char **` pointer
11584
* which avail_func_cmp() expects
11585
*/
11586
snprintf(sym_trim, sizeof(sym_trim), "%.*s", (int)(sym_sfx - sym_name), sym_name);
11587
if (!bsearch(&psym_trim, data->syms, data->cnt, sizeof(*data->syms), avail_func_cmp))
11588
return 0;
11589
}
11590
11591
err = libbpf_ensure_mem((void **)&res->addrs, &res->cap, sizeof(*res->addrs), res->cnt + 1);
11592
if (err)
11593
return err;
11594
11595
res->addrs[res->cnt++] = (unsigned long)sym_addr;
11596
return 0;
11597
}
11598
11599
static int libbpf_available_kallsyms_parse(struct kprobe_multi_resolve *res)
11600
{
11601
const char *available_functions_file = tracefs_available_filter_functions();
11602
struct avail_kallsyms_data data;
11603
char sym_name[500];
11604
FILE *f;
11605
int err = 0, ret, i;
11606
char **syms = NULL;
11607
size_t cap = 0, cnt = 0;
11608
11609
f = fopen(available_functions_file, "re");
11610
if (!f) {
11611
err = -errno;
11612
pr_warn("failed to open %s: %s\n", available_functions_file, errstr(err));
11613
return err;
11614
}
11615
11616
while (true) {
11617
char *name;
11618
11619
ret = fscanf(f, "%499s%*[^\n]\n", sym_name);
11620
if (ret == EOF && feof(f))
11621
break;
11622
11623
if (ret != 1) {
11624
pr_warn("failed to parse available_filter_functions entry: %d\n", ret);
11625
err = -EINVAL;
11626
goto cleanup;
11627
}
11628
11629
if (!glob_match(sym_name, res->pattern))
11630
continue;
11631
11632
err = libbpf_ensure_mem((void **)&syms, &cap, sizeof(*syms), cnt + 1);
11633
if (err)
11634
goto cleanup;
11635
11636
name = strdup(sym_name);
11637
if (!name) {
11638
err = -errno;
11639
goto cleanup;
11640
}
11641
11642
syms[cnt++] = name;
11643
}
11644
11645
/* no entries found, bail out */
11646
if (cnt == 0) {
11647
err = -ENOENT;
11648
goto cleanup;
11649
}
11650
11651
/* sort available functions */
11652
qsort(syms, cnt, sizeof(*syms), avail_func_cmp);
11653
11654
data.syms = syms;
11655
data.res = res;
11656
data.cnt = cnt;
11657
libbpf_kallsyms_parse(avail_kallsyms_cb, &data);
11658
11659
if (res->cnt == 0)
11660
err = -ENOENT;
11661
11662
cleanup:
11663
for (i = 0; i < cnt; i++)
11664
free((char *)syms[i]);
11665
free(syms);
11666
11667
fclose(f);
11668
return err;
11669
}
11670
11671
static bool has_available_filter_functions_addrs(void)
11672
{
11673
return access(tracefs_available_filter_functions_addrs(), R_OK) != -1;
11674
}
11675
11676
static int libbpf_available_kprobes_parse(struct kprobe_multi_resolve *res)
11677
{
11678
const char *available_path = tracefs_available_filter_functions_addrs();
11679
char sym_name[500];
11680
FILE *f;
11681
int ret, err = 0;
11682
unsigned long long sym_addr;
11683
11684
f = fopen(available_path, "re");
11685
if (!f) {
11686
err = -errno;
11687
pr_warn("failed to open %s: %s\n", available_path, errstr(err));
11688
return err;
11689
}
11690
11691
while (true) {
11692
ret = fscanf(f, "%llx %499s%*[^\n]\n", &sym_addr, sym_name);
11693
if (ret == EOF && feof(f))
11694
break;
11695
11696
if (ret != 2) {
11697
pr_warn("failed to parse available_filter_functions_addrs entry: %d\n",
11698
ret);
11699
err = -EINVAL;
11700
goto cleanup;
11701
}
11702
11703
if (!glob_match(sym_name, res->pattern))
11704
continue;
11705
11706
err = libbpf_ensure_mem((void **)&res->addrs, &res->cap,
11707
sizeof(*res->addrs), res->cnt + 1);
11708
if (err)
11709
goto cleanup;
11710
11711
res->addrs[res->cnt++] = (unsigned long)sym_addr;
11712
}
11713
11714
if (res->cnt == 0)
11715
err = -ENOENT;
11716
11717
cleanup:
11718
fclose(f);
11719
return err;
11720
}
11721
11722
struct bpf_link *
11723
bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog,
11724
const char *pattern,
11725
const struct bpf_kprobe_multi_opts *opts)
11726
{
11727
LIBBPF_OPTS(bpf_link_create_opts, lopts);
11728
struct kprobe_multi_resolve res = {
11729
.pattern = pattern,
11730
};
11731
enum bpf_attach_type attach_type;
11732
struct bpf_link *link = NULL;
11733
const unsigned long *addrs;
11734
int err, link_fd, prog_fd;
11735
bool retprobe, session, unique_match;
11736
const __u64 *cookies;
11737
const char **syms;
11738
size_t cnt;
11739
11740
if (!OPTS_VALID(opts, bpf_kprobe_multi_opts))
11741
return libbpf_err_ptr(-EINVAL);
11742
11743
prog_fd = bpf_program__fd(prog);
11744
if (prog_fd < 0) {
11745
pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n",
11746
prog->name);
11747
return libbpf_err_ptr(-EINVAL);
11748
}
11749
11750
syms = OPTS_GET(opts, syms, false);
11751
addrs = OPTS_GET(opts, addrs, false);
11752
cnt = OPTS_GET(opts, cnt, false);
11753
cookies = OPTS_GET(opts, cookies, false);
11754
unique_match = OPTS_GET(opts, unique_match, false);
11755
11756
if (!pattern && !addrs && !syms)
11757
return libbpf_err_ptr(-EINVAL);
11758
if (pattern && (addrs || syms || cookies || cnt))
11759
return libbpf_err_ptr(-EINVAL);
11760
if (!pattern && !cnt)
11761
return libbpf_err_ptr(-EINVAL);
11762
if (!pattern && unique_match)
11763
return libbpf_err_ptr(-EINVAL);
11764
if (addrs && syms)
11765
return libbpf_err_ptr(-EINVAL);
11766
11767
if (pattern) {
11768
if (has_available_filter_functions_addrs())
11769
err = libbpf_available_kprobes_parse(&res);
11770
else
11771
err = libbpf_available_kallsyms_parse(&res);
11772
if (err)
11773
goto error;
11774
11775
if (unique_match && res.cnt != 1) {
11776
pr_warn("prog '%s': failed to find a unique match for '%s' (%zu matches)\n",
11777
prog->name, pattern, res.cnt);
11778
err = -EINVAL;
11779
goto error;
11780
}
11781
11782
addrs = res.addrs;
11783
cnt = res.cnt;
11784
}
11785
11786
retprobe = OPTS_GET(opts, retprobe, false);
11787
session = OPTS_GET(opts, session, false);
11788
11789
if (retprobe && session)
11790
return libbpf_err_ptr(-EINVAL);
11791
11792
attach_type = session ? BPF_TRACE_KPROBE_SESSION : BPF_TRACE_KPROBE_MULTI;
11793
11794
lopts.kprobe_multi.syms = syms;
11795
lopts.kprobe_multi.addrs = addrs;
11796
lopts.kprobe_multi.cookies = cookies;
11797
lopts.kprobe_multi.cnt = cnt;
11798
lopts.kprobe_multi.flags = retprobe ? BPF_F_KPROBE_MULTI_RETURN : 0;
11799
11800
link = calloc(1, sizeof(*link));
11801
if (!link) {
11802
err = -ENOMEM;
11803
goto error;
11804
}
11805
link->detach = &bpf_link__detach_fd;
11806
11807
link_fd = bpf_link_create(prog_fd, 0, attach_type, &lopts);
11808
if (link_fd < 0) {
11809
err = -errno;
11810
pr_warn("prog '%s': failed to attach: %s\n",
11811
prog->name, errstr(err));
11812
goto error;
11813
}
11814
link->fd = link_fd;
11815
free(res.addrs);
11816
return link;
11817
11818
error:
11819
free(link);
11820
free(res.addrs);
11821
return libbpf_err_ptr(err);
11822
}
11823
11824
static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11825
{
11826
DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts);
11827
unsigned long offset = 0;
11828
const char *func_name;
11829
char *func;
11830
int n;
11831
11832
*link = NULL;
11833
11834
/* no auto-attach for SEC("kprobe") and SEC("kretprobe") */
11835
if (strcmp(prog->sec_name, "kprobe") == 0 || strcmp(prog->sec_name, "kretprobe") == 0)
11836
return 0;
11837
11838
opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe/");
11839
if (opts.retprobe)
11840
func_name = prog->sec_name + sizeof("kretprobe/") - 1;
11841
else
11842
func_name = prog->sec_name + sizeof("kprobe/") - 1;
11843
11844
n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset);
11845
if (n < 1) {
11846
pr_warn("kprobe name is invalid: %s\n", func_name);
11847
return -EINVAL;
11848
}
11849
if (opts.retprobe && offset != 0) {
11850
free(func);
11851
pr_warn("kretprobes do not support offset specification\n");
11852
return -EINVAL;
11853
}
11854
11855
opts.offset = offset;
11856
*link = bpf_program__attach_kprobe_opts(prog, func, &opts);
11857
free(func);
11858
return libbpf_get_error(*link);
11859
}
11860
11861
static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11862
{
11863
LIBBPF_OPTS(bpf_ksyscall_opts, opts);
11864
const char *syscall_name;
11865
11866
*link = NULL;
11867
11868
/* no auto-attach for SEC("ksyscall") and SEC("kretsyscall") */
11869
if (strcmp(prog->sec_name, "ksyscall") == 0 || strcmp(prog->sec_name, "kretsyscall") == 0)
11870
return 0;
11871
11872
opts.retprobe = str_has_pfx(prog->sec_name, "kretsyscall/");
11873
if (opts.retprobe)
11874
syscall_name = prog->sec_name + sizeof("kretsyscall/") - 1;
11875
else
11876
syscall_name = prog->sec_name + sizeof("ksyscall/") - 1;
11877
11878
*link = bpf_program__attach_ksyscall(prog, syscall_name, &opts);
11879
return *link ? 0 : -errno;
11880
}
11881
11882
static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11883
{
11884
LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
11885
const char *spec;
11886
char *pattern;
11887
int n;
11888
11889
*link = NULL;
11890
11891
/* no auto-attach for SEC("kprobe.multi") and SEC("kretprobe.multi") */
11892
if (strcmp(prog->sec_name, "kprobe.multi") == 0 ||
11893
strcmp(prog->sec_name, "kretprobe.multi") == 0)
11894
return 0;
11895
11896
opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe.multi/");
11897
if (opts.retprobe)
11898
spec = prog->sec_name + sizeof("kretprobe.multi/") - 1;
11899
else
11900
spec = prog->sec_name + sizeof("kprobe.multi/") - 1;
11901
11902
n = sscanf(spec, "%m[a-zA-Z0-9_.*?]", &pattern);
11903
if (n < 1) {
11904
pr_warn("kprobe multi pattern is invalid: %s\n", spec);
11905
return -EINVAL;
11906
}
11907
11908
*link = bpf_program__attach_kprobe_multi_opts(prog, pattern, &opts);
11909
free(pattern);
11910
return libbpf_get_error(*link);
11911
}
11912
11913
static int attach_kprobe_session(const struct bpf_program *prog, long cookie,
11914
struct bpf_link **link)
11915
{
11916
LIBBPF_OPTS(bpf_kprobe_multi_opts, opts, .session = true);
11917
const char *spec;
11918
char *pattern;
11919
int n;
11920
11921
*link = NULL;
11922
11923
/* no auto-attach for SEC("kprobe.session") */
11924
if (strcmp(prog->sec_name, "kprobe.session") == 0)
11925
return 0;
11926
11927
spec = prog->sec_name + sizeof("kprobe.session/") - 1;
11928
n = sscanf(spec, "%m[a-zA-Z0-9_.*?]", &pattern);
11929
if (n < 1) {
11930
pr_warn("kprobe session pattern is invalid: %s\n", spec);
11931
return -EINVAL;
11932
}
11933
11934
*link = bpf_program__attach_kprobe_multi_opts(prog, pattern, &opts);
11935
free(pattern);
11936
return *link ? 0 : -errno;
11937
}
11938
11939
static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11940
{
11941
char *probe_type = NULL, *binary_path = NULL, *func_name = NULL;
11942
LIBBPF_OPTS(bpf_uprobe_multi_opts, opts);
11943
int n, ret = -EINVAL;
11944
11945
*link = NULL;
11946
11947
n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]",
11948
&probe_type, &binary_path, &func_name);
11949
switch (n) {
11950
case 1:
11951
/* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */
11952
ret = 0;
11953
break;
11954
case 3:
11955
opts.session = str_has_pfx(probe_type, "uprobe.session");
11956
opts.retprobe = str_has_pfx(probe_type, "uretprobe.multi");
11957
11958
*link = bpf_program__attach_uprobe_multi(prog, -1, binary_path, func_name, &opts);
11959
ret = libbpf_get_error(*link);
11960
break;
11961
default:
11962
pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name,
11963
prog->sec_name);
11964
break;
11965
}
11966
free(probe_type);
11967
free(binary_path);
11968
free(func_name);
11969
return ret;
11970
}
11971
11972
static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe,
11973
const char *binary_path, size_t offset)
11974
{
11975
return append_to_file(tracefs_uprobe_events(), "%c:%s/%s %s:0x%zx",
11976
retprobe ? 'r' : 'p',
11977
retprobe ? "uretprobes" : "uprobes",
11978
probe_name, binary_path, offset);
11979
}
11980
11981
static inline int remove_uprobe_event_legacy(const char *probe_name, bool retprobe)
11982
{
11983
return append_to_file(tracefs_uprobe_events(), "-:%s/%s",
11984
retprobe ? "uretprobes" : "uprobes", probe_name);
11985
}
11986
11987
static int determine_uprobe_perf_type_legacy(const char *probe_name, bool retprobe)
11988
{
11989
char file[512];
11990
11991
snprintf(file, sizeof(file), "%s/events/%s/%s/id",
11992
tracefs_path(), retprobe ? "uretprobes" : "uprobes", probe_name);
11993
11994
return parse_uint_from_file(file, "%d\n");
11995
}
11996
11997
static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe,
11998
const char *binary_path, size_t offset, int pid)
11999
{
12000
const size_t attr_sz = sizeof(struct perf_event_attr);
12001
struct perf_event_attr attr;
12002
int type, pfd, err;
12003
12004
err = add_uprobe_event_legacy(probe_name, retprobe, binary_path, offset);
12005
if (err < 0) {
12006
pr_warn("failed to add legacy uprobe event for %s:0x%zx: %s\n",
12007
binary_path, (size_t)offset, errstr(err));
12008
return err;
12009
}
12010
type = determine_uprobe_perf_type_legacy(probe_name, retprobe);
12011
if (type < 0) {
12012
err = type;
12013
pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %s\n",
12014
binary_path, offset, errstr(err));
12015
goto err_clean_legacy;
12016
}
12017
12018
memset(&attr, 0, attr_sz);
12019
attr.size = attr_sz;
12020
attr.config = type;
12021
attr.type = PERF_TYPE_TRACEPOINT;
12022
12023
pfd = syscall(__NR_perf_event_open, &attr,
12024
pid < 0 ? -1 : pid, /* pid */
12025
pid == -1 ? 0 : -1, /* cpu */
12026
-1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
12027
if (pfd < 0) {
12028
err = -errno;
12029
pr_warn("legacy uprobe perf_event_open() failed: %s\n", errstr(err));
12030
goto err_clean_legacy;
12031
}
12032
return pfd;
12033
12034
err_clean_legacy:
12035
/* Clear the newly added legacy uprobe_event */
12036
remove_uprobe_event_legacy(probe_name, retprobe);
12037
return err;
12038
}
12039
12040
/* Find offset of function name in archive specified by path. Currently
12041
* supported are .zip files that do not compress their contents, as used on
12042
* Android in the form of APKs, for example. "file_name" is the name of the ELF
12043
* file inside the archive. "func_name" matches symbol name or name@@LIB for
12044
* library functions.
12045
*
12046
* An overview of the APK format specifically provided here:
12047
* https://en.wikipedia.org/w/index.php?title=Apk_(file_format)&oldid=1139099120#Package_contents
12048
*/
12049
static long elf_find_func_offset_from_archive(const char *archive_path, const char *file_name,
12050
const char *func_name)
12051
{
12052
struct zip_archive *archive;
12053
struct zip_entry entry;
12054
long ret;
12055
Elf *elf;
12056
12057
archive = zip_archive_open(archive_path);
12058
if (IS_ERR(archive)) {
12059
ret = PTR_ERR(archive);
12060
pr_warn("zip: failed to open %s: %ld\n", archive_path, ret);
12061
return ret;
12062
}
12063
12064
ret = zip_archive_find_entry(archive, file_name, &entry);
12065
if (ret) {
12066
pr_warn("zip: could not find archive member %s in %s: %ld\n", file_name,
12067
archive_path, ret);
12068
goto out;
12069
}
12070
pr_debug("zip: found entry for %s in %s at 0x%lx\n", file_name, archive_path,
12071
(unsigned long)entry.data_offset);
12072
12073
if (entry.compression) {
12074
pr_warn("zip: entry %s of %s is compressed and cannot be handled\n", file_name,
12075
archive_path);
12076
ret = -LIBBPF_ERRNO__FORMAT;
12077
goto out;
12078
}
12079
12080
elf = elf_memory((void *)entry.data, entry.data_length);
12081
if (!elf) {
12082
pr_warn("elf: could not read elf file %s from %s: %s\n", file_name, archive_path,
12083
elf_errmsg(-1));
12084
ret = -LIBBPF_ERRNO__LIBELF;
12085
goto out;
12086
}
12087
12088
ret = elf_find_func_offset(elf, file_name, func_name);
12089
if (ret > 0) {
12090
pr_debug("elf: symbol address match for %s of %s in %s: 0x%x + 0x%lx = 0x%lx\n",
12091
func_name, file_name, archive_path, entry.data_offset, ret,
12092
ret + entry.data_offset);
12093
ret += entry.data_offset;
12094
}
12095
elf_end(elf);
12096
12097
out:
12098
zip_archive_close(archive);
12099
return ret;
12100
}
12101
12102
static const char *arch_specific_lib_paths(void)
12103
{
12104
/*
12105
* Based on https://packages.debian.org/sid/libc6.
12106
*
12107
* Assume that the traced program is built for the same architecture
12108
* as libbpf, which should cover the vast majority of cases.
12109
*/
12110
#if defined(__x86_64__)
12111
return "/lib/x86_64-linux-gnu";
12112
#elif defined(__i386__)
12113
return "/lib/i386-linux-gnu";
12114
#elif defined(__s390x__)
12115
return "/lib/s390x-linux-gnu";
12116
#elif defined(__s390__)
12117
return "/lib/s390-linux-gnu";
12118
#elif defined(__arm__) && defined(__SOFTFP__)
12119
return "/lib/arm-linux-gnueabi";
12120
#elif defined(__arm__) && !defined(__SOFTFP__)
12121
return "/lib/arm-linux-gnueabihf";
12122
#elif defined(__aarch64__)
12123
return "/lib/aarch64-linux-gnu";
12124
#elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 64
12125
return "/lib/mips64el-linux-gnuabi64";
12126
#elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 32
12127
return "/lib/mipsel-linux-gnu";
12128
#elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
12129
return "/lib/powerpc64le-linux-gnu";
12130
#elif defined(__sparc__) && defined(__arch64__)
12131
return "/lib/sparc64-linux-gnu";
12132
#elif defined(__riscv) && __riscv_xlen == 64
12133
return "/lib/riscv64-linux-gnu";
12134
#else
12135
return NULL;
12136
#endif
12137
}
12138
12139
/* Get full path to program/shared library. */
12140
static int resolve_full_path(const char *file, char *result, size_t result_sz)
12141
{
12142
const char *search_paths[3] = {};
12143
int i, perm;
12144
12145
if (str_has_sfx(file, ".so") || strstr(file, ".so.")) {
12146
search_paths[0] = getenv("LD_LIBRARY_PATH");
12147
search_paths[1] = "/usr/lib64:/usr/lib";
12148
search_paths[2] = arch_specific_lib_paths();
12149
perm = R_OK;
12150
} else {
12151
search_paths[0] = getenv("PATH");
12152
search_paths[1] = "/usr/bin:/usr/sbin";
12153
perm = R_OK | X_OK;
12154
}
12155
12156
for (i = 0; i < ARRAY_SIZE(search_paths); i++) {
12157
const char *s;
12158
12159
if (!search_paths[i])
12160
continue;
12161
for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) {
12162
char *next_path;
12163
int seg_len;
12164
12165
if (s[0] == ':')
12166
s++;
12167
next_path = strchr(s, ':');
12168
seg_len = next_path ? next_path - s : strlen(s);
12169
if (!seg_len)
12170
continue;
12171
snprintf(result, result_sz, "%.*s/%s", seg_len, s, file);
12172
/* ensure it has required permissions */
12173
if (faccessat(AT_FDCWD, result, perm, AT_EACCESS) < 0)
12174
continue;
12175
pr_debug("resolved '%s' to '%s'\n", file, result);
12176
return 0;
12177
}
12178
}
12179
return -ENOENT;
12180
}
12181
12182
struct bpf_link *
12183
bpf_program__attach_uprobe_multi(const struct bpf_program *prog,
12184
pid_t pid,
12185
const char *path,
12186
const char *func_pattern,
12187
const struct bpf_uprobe_multi_opts *opts)
12188
{
12189
const unsigned long *ref_ctr_offsets = NULL, *offsets = NULL;
12190
LIBBPF_OPTS(bpf_link_create_opts, lopts);
12191
unsigned long *resolved_offsets = NULL;
12192
enum bpf_attach_type attach_type;
12193
int err = 0, link_fd, prog_fd;
12194
struct bpf_link *link = NULL;
12195
char full_path[PATH_MAX];
12196
bool retprobe, session;
12197
const __u64 *cookies;
12198
const char **syms;
12199
size_t cnt;
12200
12201
if (!OPTS_VALID(opts, bpf_uprobe_multi_opts))
12202
return libbpf_err_ptr(-EINVAL);
12203
12204
prog_fd = bpf_program__fd(prog);
12205
if (prog_fd < 0) {
12206
pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n",
12207
prog->name);
12208
return libbpf_err_ptr(-EINVAL);
12209
}
12210
12211
syms = OPTS_GET(opts, syms, NULL);
12212
offsets = OPTS_GET(opts, offsets, NULL);
12213
ref_ctr_offsets = OPTS_GET(opts, ref_ctr_offsets, NULL);
12214
cookies = OPTS_GET(opts, cookies, NULL);
12215
cnt = OPTS_GET(opts, cnt, 0);
12216
retprobe = OPTS_GET(opts, retprobe, false);
12217
session = OPTS_GET(opts, session, false);
12218
12219
/*
12220
* User can specify 2 mutually exclusive set of inputs:
12221
*
12222
* 1) use only path/func_pattern/pid arguments
12223
*
12224
* 2) use path/pid with allowed combinations of:
12225
* syms/offsets/ref_ctr_offsets/cookies/cnt
12226
*
12227
* - syms and offsets are mutually exclusive
12228
* - ref_ctr_offsets and cookies are optional
12229
*
12230
* Any other usage results in error.
12231
*/
12232
12233
if (!path)
12234
return libbpf_err_ptr(-EINVAL);
12235
if (!func_pattern && cnt == 0)
12236
return libbpf_err_ptr(-EINVAL);
12237
12238
if (func_pattern) {
12239
if (syms || offsets || ref_ctr_offsets || cookies || cnt)
12240
return libbpf_err_ptr(-EINVAL);
12241
} else {
12242
if (!!syms == !!offsets)
12243
return libbpf_err_ptr(-EINVAL);
12244
}
12245
12246
if (retprobe && session)
12247
return libbpf_err_ptr(-EINVAL);
12248
12249
if (func_pattern) {
12250
if (!strchr(path, '/')) {
12251
err = resolve_full_path(path, full_path, sizeof(full_path));
12252
if (err) {
12253
pr_warn("prog '%s': failed to resolve full path for '%s': %s\n",
12254
prog->name, path, errstr(err));
12255
return libbpf_err_ptr(err);
12256
}
12257
path = full_path;
12258
}
12259
12260
err = elf_resolve_pattern_offsets(path, func_pattern,
12261
&resolved_offsets, &cnt);
12262
if (err < 0)
12263
return libbpf_err_ptr(err);
12264
offsets = resolved_offsets;
12265
} else if (syms) {
12266
err = elf_resolve_syms_offsets(path, cnt, syms, &resolved_offsets, STT_FUNC);
12267
if (err < 0)
12268
return libbpf_err_ptr(err);
12269
offsets = resolved_offsets;
12270
}
12271
12272
attach_type = session ? BPF_TRACE_UPROBE_SESSION : BPF_TRACE_UPROBE_MULTI;
12273
12274
lopts.uprobe_multi.path = path;
12275
lopts.uprobe_multi.offsets = offsets;
12276
lopts.uprobe_multi.ref_ctr_offsets = ref_ctr_offsets;
12277
lopts.uprobe_multi.cookies = cookies;
12278
lopts.uprobe_multi.cnt = cnt;
12279
lopts.uprobe_multi.flags = retprobe ? BPF_F_UPROBE_MULTI_RETURN : 0;
12280
12281
if (pid == 0)
12282
pid = getpid();
12283
if (pid > 0)
12284
lopts.uprobe_multi.pid = pid;
12285
12286
link = calloc(1, sizeof(*link));
12287
if (!link) {
12288
err = -ENOMEM;
12289
goto error;
12290
}
12291
link->detach = &bpf_link__detach_fd;
12292
12293
link_fd = bpf_link_create(prog_fd, 0, attach_type, &lopts);
12294
if (link_fd < 0) {
12295
err = -errno;
12296
pr_warn("prog '%s': failed to attach multi-uprobe: %s\n",
12297
prog->name, errstr(err));
12298
goto error;
12299
}
12300
link->fd = link_fd;
12301
free(resolved_offsets);
12302
return link;
12303
12304
error:
12305
free(resolved_offsets);
12306
free(link);
12307
return libbpf_err_ptr(err);
12308
}
12309
12310
LIBBPF_API struct bpf_link *
12311
bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
12312
const char *binary_path, size_t func_offset,
12313
const struct bpf_uprobe_opts *opts)
12314
{
12315
const char *archive_path = NULL, *archive_sep = NULL;
12316
char *legacy_probe = NULL;
12317
DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
12318
enum probe_attach_mode attach_mode;
12319
char full_path[PATH_MAX];
12320
struct bpf_link *link;
12321
size_t ref_ctr_off;
12322
int pfd, err;
12323
bool retprobe, legacy;
12324
const char *func_name;
12325
12326
if (!OPTS_VALID(opts, bpf_uprobe_opts))
12327
return libbpf_err_ptr(-EINVAL);
12328
12329
attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT);
12330
retprobe = OPTS_GET(opts, retprobe, false);
12331
ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0);
12332
pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
12333
12334
if (!binary_path)
12335
return libbpf_err_ptr(-EINVAL);
12336
12337
/* Check if "binary_path" refers to an archive. */
12338
archive_sep = strstr(binary_path, "!/");
12339
if (archive_sep) {
12340
full_path[0] = '\0';
12341
libbpf_strlcpy(full_path, binary_path,
12342
min(sizeof(full_path), (size_t)(archive_sep - binary_path + 1)));
12343
archive_path = full_path;
12344
binary_path = archive_sep + 2;
12345
} else if (!strchr(binary_path, '/')) {
12346
err = resolve_full_path(binary_path, full_path, sizeof(full_path));
12347
if (err) {
12348
pr_warn("prog '%s': failed to resolve full path for '%s': %s\n",
12349
prog->name, binary_path, errstr(err));
12350
return libbpf_err_ptr(err);
12351
}
12352
binary_path = full_path;
12353
}
12354
func_name = OPTS_GET(opts, func_name, NULL);
12355
if (func_name) {
12356
long sym_off;
12357
12358
if (archive_path) {
12359
sym_off = elf_find_func_offset_from_archive(archive_path, binary_path,
12360
func_name);
12361
binary_path = archive_path;
12362
} else {
12363
sym_off = elf_find_func_offset_from_file(binary_path, func_name);
12364
}
12365
if (sym_off < 0)
12366
return libbpf_err_ptr(sym_off);
12367
func_offset += sym_off;
12368
}
12369
12370
legacy = determine_uprobe_perf_type() < 0;
12371
switch (attach_mode) {
12372
case PROBE_ATTACH_MODE_LEGACY:
12373
legacy = true;
12374
pe_opts.force_ioctl_attach = true;
12375
break;
12376
case PROBE_ATTACH_MODE_PERF:
12377
if (legacy)
12378
return libbpf_err_ptr(-ENOTSUP);
12379
pe_opts.force_ioctl_attach = true;
12380
break;
12381
case PROBE_ATTACH_MODE_LINK:
12382
if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK))
12383
return libbpf_err_ptr(-ENOTSUP);
12384
break;
12385
case PROBE_ATTACH_MODE_DEFAULT:
12386
break;
12387
default:
12388
return libbpf_err_ptr(-EINVAL);
12389
}
12390
12391
if (!legacy) {
12392
pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path,
12393
func_offset, pid, ref_ctr_off);
12394
} else {
12395
char probe_name[MAX_EVENT_NAME_LEN];
12396
12397
if (ref_ctr_off)
12398
return libbpf_err_ptr(-EINVAL);
12399
12400
gen_probe_legacy_event_name(probe_name, sizeof(probe_name),
12401
strrchr(binary_path, '/') ? : binary_path,
12402
func_offset);
12403
12404
legacy_probe = strdup(probe_name);
12405
if (!legacy_probe)
12406
return libbpf_err_ptr(-ENOMEM);
12407
12408
pfd = perf_event_uprobe_open_legacy(legacy_probe, retprobe,
12409
binary_path, func_offset, pid);
12410
}
12411
if (pfd < 0) {
12412
err = -errno;
12413
pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
12414
prog->name, retprobe ? "uretprobe" : "uprobe",
12415
binary_path, func_offset,
12416
errstr(err));
12417
goto err_out;
12418
}
12419
12420
link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
12421
err = libbpf_get_error(link);
12422
if (err) {
12423
close(pfd);
12424
pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n",
12425
prog->name, retprobe ? "uretprobe" : "uprobe",
12426
binary_path, func_offset,
12427
errstr(err));
12428
goto err_clean_legacy;
12429
}
12430
if (legacy) {
12431
struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
12432
12433
perf_link->legacy_probe_name = legacy_probe;
12434
perf_link->legacy_is_kprobe = false;
12435
perf_link->legacy_is_retprobe = retprobe;
12436
}
12437
return link;
12438
12439
err_clean_legacy:
12440
if (legacy)
12441
remove_uprobe_event_legacy(legacy_probe, retprobe);
12442
err_out:
12443
free(legacy_probe);
12444
return libbpf_err_ptr(err);
12445
}
12446
12447
/* Format of u[ret]probe section definition supporting auto-attach:
12448
* u[ret]probe/binary:function[+offset]
12449
*
12450
* binary can be an absolute/relative path or a filename; the latter is resolved to a
12451
* full binary path via bpf_program__attach_uprobe_opts.
12452
*
12453
* Specifying uprobe+ ensures we carry out strict matching; either "uprobe" must be
12454
* specified (and auto-attach is not possible) or the above format is specified for
12455
* auto-attach.
12456
*/
12457
static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12458
{
12459
DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts);
12460
char *probe_type = NULL, *binary_path = NULL, *func_name = NULL, *func_off;
12461
int n, c, ret = -EINVAL;
12462
long offset = 0;
12463
12464
*link = NULL;
12465
12466
n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]",
12467
&probe_type, &binary_path, &func_name);
12468
switch (n) {
12469
case 1:
12470
/* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */
12471
ret = 0;
12472
break;
12473
case 2:
12474
pr_warn("prog '%s': section '%s' missing ':function[+offset]' specification\n",
12475
prog->name, prog->sec_name);
12476
break;
12477
case 3:
12478
/* check if user specifies `+offset`, if yes, this should be
12479
* the last part of the string, make sure sscanf read to EOL
12480
*/
12481
func_off = strrchr(func_name, '+');
12482
if (func_off) {
12483
n = sscanf(func_off, "+%li%n", &offset, &c);
12484
if (n == 1 && *(func_off + c) == '\0')
12485
func_off[0] = '\0';
12486
else
12487
offset = 0;
12488
}
12489
opts.retprobe = strcmp(probe_type, "uretprobe") == 0 ||
12490
strcmp(probe_type, "uretprobe.s") == 0;
12491
if (opts.retprobe && offset != 0) {
12492
pr_warn("prog '%s': uretprobes do not support offset specification\n",
12493
prog->name);
12494
break;
12495
}
12496
opts.func_name = func_name;
12497
*link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts);
12498
ret = libbpf_get_error(*link);
12499
break;
12500
default:
12501
pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name,
12502
prog->sec_name);
12503
break;
12504
}
12505
free(probe_type);
12506
free(binary_path);
12507
free(func_name);
12508
12509
return ret;
12510
}
12511
12512
struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog,
12513
bool retprobe, pid_t pid,
12514
const char *binary_path,
12515
size_t func_offset)
12516
{
12517
DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts, .retprobe = retprobe);
12518
12519
return bpf_program__attach_uprobe_opts(prog, pid, binary_path, func_offset, &opts);
12520
}
12521
12522
struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog,
12523
pid_t pid, const char *binary_path,
12524
const char *usdt_provider, const char *usdt_name,
12525
const struct bpf_usdt_opts *opts)
12526
{
12527
char resolved_path[512];
12528
struct bpf_object *obj = prog->obj;
12529
struct bpf_link *link;
12530
__u64 usdt_cookie;
12531
int err;
12532
12533
if (!OPTS_VALID(opts, bpf_uprobe_opts))
12534
return libbpf_err_ptr(-EINVAL);
12535
12536
if (bpf_program__fd(prog) < 0) {
12537
pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n",
12538
prog->name);
12539
return libbpf_err_ptr(-EINVAL);
12540
}
12541
12542
if (!binary_path)
12543
return libbpf_err_ptr(-EINVAL);
12544
12545
if (!strchr(binary_path, '/')) {
12546
err = resolve_full_path(binary_path, resolved_path, sizeof(resolved_path));
12547
if (err) {
12548
pr_warn("prog '%s': failed to resolve full path for '%s': %s\n",
12549
prog->name, binary_path, errstr(err));
12550
return libbpf_err_ptr(err);
12551
}
12552
binary_path = resolved_path;
12553
}
12554
12555
/* USDT manager is instantiated lazily on first USDT attach. It will
12556
* be destroyed together with BPF object in bpf_object__close().
12557
*/
12558
if (IS_ERR(obj->usdt_man))
12559
return libbpf_ptr(obj->usdt_man);
12560
if (!obj->usdt_man) {
12561
obj->usdt_man = usdt_manager_new(obj);
12562
if (IS_ERR(obj->usdt_man))
12563
return libbpf_ptr(obj->usdt_man);
12564
}
12565
12566
usdt_cookie = OPTS_GET(opts, usdt_cookie, 0);
12567
link = usdt_manager_attach_usdt(obj->usdt_man, prog, pid, binary_path,
12568
usdt_provider, usdt_name, usdt_cookie);
12569
err = libbpf_get_error(link);
12570
if (err)
12571
return libbpf_err_ptr(err);
12572
return link;
12573
}
12574
12575
static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12576
{
12577
char *path = NULL, *provider = NULL, *name = NULL;
12578
const char *sec_name;
12579
int n, err;
12580
12581
sec_name = bpf_program__section_name(prog);
12582
if (strcmp(sec_name, "usdt") == 0) {
12583
/* no auto-attach for just SEC("usdt") */
12584
*link = NULL;
12585
return 0;
12586
}
12587
12588
n = sscanf(sec_name, "usdt/%m[^:]:%m[^:]:%m[^:]", &path, &provider, &name);
12589
if (n != 3) {
12590
pr_warn("invalid section '%s', expected SEC(\"usdt/<path>:<provider>:<name>\")\n",
12591
sec_name);
12592
err = -EINVAL;
12593
} else {
12594
*link = bpf_program__attach_usdt(prog, -1 /* any process */, path,
12595
provider, name, NULL);
12596
err = libbpf_get_error(*link);
12597
}
12598
free(path);
12599
free(provider);
12600
free(name);
12601
return err;
12602
}
12603
12604
static int determine_tracepoint_id(const char *tp_category,
12605
const char *tp_name)
12606
{
12607
char file[PATH_MAX];
12608
int ret;
12609
12610
ret = snprintf(file, sizeof(file), "%s/events/%s/%s/id",
12611
tracefs_path(), tp_category, tp_name);
12612
if (ret < 0)
12613
return -errno;
12614
if (ret >= sizeof(file)) {
12615
pr_debug("tracepoint %s/%s path is too long\n",
12616
tp_category, tp_name);
12617
return -E2BIG;
12618
}
12619
return parse_uint_from_file(file, "%d\n");
12620
}
12621
12622
static int perf_event_open_tracepoint(const char *tp_category,
12623
const char *tp_name)
12624
{
12625
const size_t attr_sz = sizeof(struct perf_event_attr);
12626
struct perf_event_attr attr;
12627
int tp_id, pfd, err;
12628
12629
tp_id = determine_tracepoint_id(tp_category, tp_name);
12630
if (tp_id < 0) {
12631
pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n",
12632
tp_category, tp_name,
12633
errstr(tp_id));
12634
return tp_id;
12635
}
12636
12637
memset(&attr, 0, attr_sz);
12638
attr.type = PERF_TYPE_TRACEPOINT;
12639
attr.size = attr_sz;
12640
attr.config = tp_id;
12641
12642
pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */,
12643
-1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
12644
if (pfd < 0) {
12645
err = -errno;
12646
pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n",
12647
tp_category, tp_name,
12648
errstr(err));
12649
return err;
12650
}
12651
return pfd;
12652
}
12653
12654
struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *prog,
12655
const char *tp_category,
12656
const char *tp_name,
12657
const struct bpf_tracepoint_opts *opts)
12658
{
12659
DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
12660
struct bpf_link *link;
12661
int pfd, err;
12662
12663
if (!OPTS_VALID(opts, bpf_tracepoint_opts))
12664
return libbpf_err_ptr(-EINVAL);
12665
12666
pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
12667
12668
pfd = perf_event_open_tracepoint(tp_category, tp_name);
12669
if (pfd < 0) {
12670
pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
12671
prog->name, tp_category, tp_name,
12672
errstr(pfd));
12673
return libbpf_err_ptr(pfd);
12674
}
12675
link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
12676
err = libbpf_get_error(link);
12677
if (err) {
12678
close(pfd);
12679
pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n",
12680
prog->name, tp_category, tp_name,
12681
errstr(err));
12682
return libbpf_err_ptr(err);
12683
}
12684
return link;
12685
}
12686
12687
struct bpf_link *bpf_program__attach_tracepoint(const struct bpf_program *prog,
12688
const char *tp_category,
12689
const char *tp_name)
12690
{
12691
return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL);
12692
}
12693
12694
static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12695
{
12696
char *sec_name, *tp_cat, *tp_name;
12697
12698
*link = NULL;
12699
12700
/* no auto-attach for SEC("tp") or SEC("tracepoint") */
12701
if (strcmp(prog->sec_name, "tp") == 0 || strcmp(prog->sec_name, "tracepoint") == 0)
12702
return 0;
12703
12704
sec_name = strdup(prog->sec_name);
12705
if (!sec_name)
12706
return -ENOMEM;
12707
12708
/* extract "tp/<category>/<name>" or "tracepoint/<category>/<name>" */
12709
if (str_has_pfx(prog->sec_name, "tp/"))
12710
tp_cat = sec_name + sizeof("tp/") - 1;
12711
else
12712
tp_cat = sec_name + sizeof("tracepoint/") - 1;
12713
tp_name = strchr(tp_cat, '/');
12714
if (!tp_name) {
12715
free(sec_name);
12716
return -EINVAL;
12717
}
12718
*tp_name = '\0';
12719
tp_name++;
12720
12721
*link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name);
12722
free(sec_name);
12723
return libbpf_get_error(*link);
12724
}
12725
12726
struct bpf_link *
12727
bpf_program__attach_raw_tracepoint_opts(const struct bpf_program *prog,
12728
const char *tp_name,
12729
struct bpf_raw_tracepoint_opts *opts)
12730
{
12731
LIBBPF_OPTS(bpf_raw_tp_opts, raw_opts);
12732
struct bpf_link *link;
12733
int prog_fd, pfd;
12734
12735
if (!OPTS_VALID(opts, bpf_raw_tracepoint_opts))
12736
return libbpf_err_ptr(-EINVAL);
12737
12738
prog_fd = bpf_program__fd(prog);
12739
if (prog_fd < 0) {
12740
pr_warn("prog '%s': can't attach before loaded\n", prog->name);
12741
return libbpf_err_ptr(-EINVAL);
12742
}
12743
12744
link = calloc(1, sizeof(*link));
12745
if (!link)
12746
return libbpf_err_ptr(-ENOMEM);
12747
link->detach = &bpf_link__detach_fd;
12748
12749
raw_opts.tp_name = tp_name;
12750
raw_opts.cookie = OPTS_GET(opts, cookie, 0);
12751
pfd = bpf_raw_tracepoint_open_opts(prog_fd, &raw_opts);
12752
if (pfd < 0) {
12753
pfd = -errno;
12754
free(link);
12755
pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n",
12756
prog->name, tp_name, errstr(pfd));
12757
return libbpf_err_ptr(pfd);
12758
}
12759
link->fd = pfd;
12760
return link;
12761
}
12762
12763
struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *prog,
12764
const char *tp_name)
12765
{
12766
return bpf_program__attach_raw_tracepoint_opts(prog, tp_name, NULL);
12767
}
12768
12769
static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12770
{
12771
static const char *const prefixes[] = {
12772
"raw_tp",
12773
"raw_tracepoint",
12774
"raw_tp.w",
12775
"raw_tracepoint.w",
12776
};
12777
size_t i;
12778
const char *tp_name = NULL;
12779
12780
*link = NULL;
12781
12782
for (i = 0; i < ARRAY_SIZE(prefixes); i++) {
12783
size_t pfx_len;
12784
12785
if (!str_has_pfx(prog->sec_name, prefixes[i]))
12786
continue;
12787
12788
pfx_len = strlen(prefixes[i]);
12789
/* no auto-attach case of, e.g., SEC("raw_tp") */
12790
if (prog->sec_name[pfx_len] == '\0')
12791
return 0;
12792
12793
if (prog->sec_name[pfx_len] != '/')
12794
continue;
12795
12796
tp_name = prog->sec_name + pfx_len + 1;
12797
break;
12798
}
12799
12800
if (!tp_name) {
12801
pr_warn("prog '%s': invalid section name '%s'\n",
12802
prog->name, prog->sec_name);
12803
return -EINVAL;
12804
}
12805
12806
*link = bpf_program__attach_raw_tracepoint(prog, tp_name);
12807
return libbpf_get_error(*link);
12808
}
12809
12810
/* Common logic for all BPF program types that attach to a btf_id */
12811
static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog,
12812
const struct bpf_trace_opts *opts)
12813
{
12814
LIBBPF_OPTS(bpf_link_create_opts, link_opts);
12815
struct bpf_link *link;
12816
int prog_fd, pfd;
12817
12818
if (!OPTS_VALID(opts, bpf_trace_opts))
12819
return libbpf_err_ptr(-EINVAL);
12820
12821
prog_fd = bpf_program__fd(prog);
12822
if (prog_fd < 0) {
12823
pr_warn("prog '%s': can't attach before loaded\n", prog->name);
12824
return libbpf_err_ptr(-EINVAL);
12825
}
12826
12827
link = calloc(1, sizeof(*link));
12828
if (!link)
12829
return libbpf_err_ptr(-ENOMEM);
12830
link->detach = &bpf_link__detach_fd;
12831
12832
/* libbpf is smart enough to redirect to BPF_RAW_TRACEPOINT_OPEN on old kernels */
12833
link_opts.tracing.cookie = OPTS_GET(opts, cookie, 0);
12834
pfd = bpf_link_create(prog_fd, 0, bpf_program__expected_attach_type(prog), &link_opts);
12835
if (pfd < 0) {
12836
pfd = -errno;
12837
free(link);
12838
pr_warn("prog '%s': failed to attach: %s\n",
12839
prog->name, errstr(pfd));
12840
return libbpf_err_ptr(pfd);
12841
}
12842
link->fd = pfd;
12843
return link;
12844
}
12845
12846
struct bpf_link *bpf_program__attach_trace(const struct bpf_program *prog)
12847
{
12848
return bpf_program__attach_btf_id(prog, NULL);
12849
}
12850
12851
struct bpf_link *bpf_program__attach_trace_opts(const struct bpf_program *prog,
12852
const struct bpf_trace_opts *opts)
12853
{
12854
return bpf_program__attach_btf_id(prog, opts);
12855
}
12856
12857
struct bpf_link *bpf_program__attach_lsm(const struct bpf_program *prog)
12858
{
12859
return bpf_program__attach_btf_id(prog, NULL);
12860
}
12861
12862
static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12863
{
12864
*link = bpf_program__attach_trace(prog);
12865
return libbpf_get_error(*link);
12866
}
12867
12868
static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12869
{
12870
*link = bpf_program__attach_lsm(prog);
12871
return libbpf_get_error(*link);
12872
}
12873
12874
static struct bpf_link *
12875
bpf_program_attach_fd(const struct bpf_program *prog,
12876
int target_fd, const char *target_name,
12877
const struct bpf_link_create_opts *opts)
12878
{
12879
enum bpf_attach_type attach_type;
12880
struct bpf_link *link;
12881
int prog_fd, link_fd;
12882
12883
prog_fd = bpf_program__fd(prog);
12884
if (prog_fd < 0) {
12885
pr_warn("prog '%s': can't attach before loaded\n", prog->name);
12886
return libbpf_err_ptr(-EINVAL);
12887
}
12888
12889
link = calloc(1, sizeof(*link));
12890
if (!link)
12891
return libbpf_err_ptr(-ENOMEM);
12892
link->detach = &bpf_link__detach_fd;
12893
12894
attach_type = bpf_program__expected_attach_type(prog);
12895
link_fd = bpf_link_create(prog_fd, target_fd, attach_type, opts);
12896
if (link_fd < 0) {
12897
link_fd = -errno;
12898
free(link);
12899
pr_warn("prog '%s': failed to attach to %s: %s\n",
12900
prog->name, target_name,
12901
errstr(link_fd));
12902
return libbpf_err_ptr(link_fd);
12903
}
12904
link->fd = link_fd;
12905
return link;
12906
}
12907
12908
struct bpf_link *
12909
bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd)
12910
{
12911
return bpf_program_attach_fd(prog, cgroup_fd, "cgroup", NULL);
12912
}
12913
12914
struct bpf_link *
12915
bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd)
12916
{
12917
return bpf_program_attach_fd(prog, netns_fd, "netns", NULL);
12918
}
12919
12920
struct bpf_link *
12921
bpf_program__attach_sockmap(const struct bpf_program *prog, int map_fd)
12922
{
12923
return bpf_program_attach_fd(prog, map_fd, "sockmap", NULL);
12924
}
12925
12926
struct bpf_link *bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex)
12927
{
12928
/* target_fd/target_ifindex use the same field in LINK_CREATE */
12929
return bpf_program_attach_fd(prog, ifindex, "xdp", NULL);
12930
}
12931
12932
struct bpf_link *
12933
bpf_program__attach_cgroup_opts(const struct bpf_program *prog, int cgroup_fd,
12934
const struct bpf_cgroup_opts *opts)
12935
{
12936
LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
12937
__u32 relative_id;
12938
int relative_fd;
12939
12940
if (!OPTS_VALID(opts, bpf_cgroup_opts))
12941
return libbpf_err_ptr(-EINVAL);
12942
12943
relative_id = OPTS_GET(opts, relative_id, 0);
12944
relative_fd = OPTS_GET(opts, relative_fd, 0);
12945
12946
if (relative_fd && relative_id) {
12947
pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n",
12948
prog->name);
12949
return libbpf_err_ptr(-EINVAL);
12950
}
12951
12952
link_create_opts.cgroup.expected_revision = OPTS_GET(opts, expected_revision, 0);
12953
link_create_opts.cgroup.relative_fd = relative_fd;
12954
link_create_opts.cgroup.relative_id = relative_id;
12955
link_create_opts.flags = OPTS_GET(opts, flags, 0);
12956
12957
return bpf_program_attach_fd(prog, cgroup_fd, "cgroup", &link_create_opts);
12958
}
12959
12960
struct bpf_link *
12961
bpf_program__attach_tcx(const struct bpf_program *prog, int ifindex,
12962
const struct bpf_tcx_opts *opts)
12963
{
12964
LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
12965
__u32 relative_id;
12966
int relative_fd;
12967
12968
if (!OPTS_VALID(opts, bpf_tcx_opts))
12969
return libbpf_err_ptr(-EINVAL);
12970
12971
relative_id = OPTS_GET(opts, relative_id, 0);
12972
relative_fd = OPTS_GET(opts, relative_fd, 0);
12973
12974
/* validate we don't have unexpected combinations of non-zero fields */
12975
if (!ifindex) {
12976
pr_warn("prog '%s': target netdevice ifindex cannot be zero\n",
12977
prog->name);
12978
return libbpf_err_ptr(-EINVAL);
12979
}
12980
if (relative_fd && relative_id) {
12981
pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n",
12982
prog->name);
12983
return libbpf_err_ptr(-EINVAL);
12984
}
12985
12986
link_create_opts.tcx.expected_revision = OPTS_GET(opts, expected_revision, 0);
12987
link_create_opts.tcx.relative_fd = relative_fd;
12988
link_create_opts.tcx.relative_id = relative_id;
12989
link_create_opts.flags = OPTS_GET(opts, flags, 0);
12990
12991
/* target_fd/target_ifindex use the same field in LINK_CREATE */
12992
return bpf_program_attach_fd(prog, ifindex, "tcx", &link_create_opts);
12993
}
12994
12995
struct bpf_link *
12996
bpf_program__attach_netkit(const struct bpf_program *prog, int ifindex,
12997
const struct bpf_netkit_opts *opts)
12998
{
12999
LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
13000
__u32 relative_id;
13001
int relative_fd;
13002
13003
if (!OPTS_VALID(opts, bpf_netkit_opts))
13004
return libbpf_err_ptr(-EINVAL);
13005
13006
relative_id = OPTS_GET(opts, relative_id, 0);
13007
relative_fd = OPTS_GET(opts, relative_fd, 0);
13008
13009
/* validate we don't have unexpected combinations of non-zero fields */
13010
if (!ifindex) {
13011
pr_warn("prog '%s': target netdevice ifindex cannot be zero\n",
13012
prog->name);
13013
return libbpf_err_ptr(-EINVAL);
13014
}
13015
if (relative_fd && relative_id) {
13016
pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n",
13017
prog->name);
13018
return libbpf_err_ptr(-EINVAL);
13019
}
13020
13021
link_create_opts.netkit.expected_revision = OPTS_GET(opts, expected_revision, 0);
13022
link_create_opts.netkit.relative_fd = relative_fd;
13023
link_create_opts.netkit.relative_id = relative_id;
13024
link_create_opts.flags = OPTS_GET(opts, flags, 0);
13025
13026
return bpf_program_attach_fd(prog, ifindex, "netkit", &link_create_opts);
13027
}
13028
13029
struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog,
13030
int target_fd,
13031
const char *attach_func_name)
13032
{
13033
int btf_id;
13034
13035
if (!!target_fd != !!attach_func_name) {
13036
pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n",
13037
prog->name);
13038
return libbpf_err_ptr(-EINVAL);
13039
}
13040
13041
if (prog->type != BPF_PROG_TYPE_EXT) {
13042
pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace\n",
13043
prog->name);
13044
return libbpf_err_ptr(-EINVAL);
13045
}
13046
13047
if (target_fd) {
13048
LIBBPF_OPTS(bpf_link_create_opts, target_opts);
13049
13050
btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd, prog->obj->token_fd);
13051
if (btf_id < 0)
13052
return libbpf_err_ptr(btf_id);
13053
13054
target_opts.target_btf_id = btf_id;
13055
13056
return bpf_program_attach_fd(prog, target_fd, "freplace",
13057
&target_opts);
13058
} else {
13059
/* no target, so use raw_tracepoint_open for compatibility
13060
* with old kernels
13061
*/
13062
return bpf_program__attach_trace(prog);
13063
}
13064
}
13065
13066
struct bpf_link *
13067
bpf_program__attach_iter(const struct bpf_program *prog,
13068
const struct bpf_iter_attach_opts *opts)
13069
{
13070
DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
13071
struct bpf_link *link;
13072
int prog_fd, link_fd;
13073
__u32 target_fd = 0;
13074
13075
if (!OPTS_VALID(opts, bpf_iter_attach_opts))
13076
return libbpf_err_ptr(-EINVAL);
13077
13078
link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0);
13079
link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0);
13080
13081
prog_fd = bpf_program__fd(prog);
13082
if (prog_fd < 0) {
13083
pr_warn("prog '%s': can't attach before loaded\n", prog->name);
13084
return libbpf_err_ptr(-EINVAL);
13085
}
13086
13087
link = calloc(1, sizeof(*link));
13088
if (!link)
13089
return libbpf_err_ptr(-ENOMEM);
13090
link->detach = &bpf_link__detach_fd;
13091
13092
link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER,
13093
&link_create_opts);
13094
if (link_fd < 0) {
13095
link_fd = -errno;
13096
free(link);
13097
pr_warn("prog '%s': failed to attach to iterator: %s\n",
13098
prog->name, errstr(link_fd));
13099
return libbpf_err_ptr(link_fd);
13100
}
13101
link->fd = link_fd;
13102
return link;
13103
}
13104
13105
static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link)
13106
{
13107
*link = bpf_program__attach_iter(prog, NULL);
13108
return libbpf_get_error(*link);
13109
}
13110
13111
struct bpf_link *bpf_program__attach_netfilter(const struct bpf_program *prog,
13112
const struct bpf_netfilter_opts *opts)
13113
{
13114
LIBBPF_OPTS(bpf_link_create_opts, lopts);
13115
struct bpf_link *link;
13116
int prog_fd, link_fd;
13117
13118
if (!OPTS_VALID(opts, bpf_netfilter_opts))
13119
return libbpf_err_ptr(-EINVAL);
13120
13121
prog_fd = bpf_program__fd(prog);
13122
if (prog_fd < 0) {
13123
pr_warn("prog '%s': can't attach before loaded\n", prog->name);
13124
return libbpf_err_ptr(-EINVAL);
13125
}
13126
13127
link = calloc(1, sizeof(*link));
13128
if (!link)
13129
return libbpf_err_ptr(-ENOMEM);
13130
13131
link->detach = &bpf_link__detach_fd;
13132
13133
lopts.netfilter.pf = OPTS_GET(opts, pf, 0);
13134
lopts.netfilter.hooknum = OPTS_GET(opts, hooknum, 0);
13135
lopts.netfilter.priority = OPTS_GET(opts, priority, 0);
13136
lopts.netfilter.flags = OPTS_GET(opts, flags, 0);
13137
13138
link_fd = bpf_link_create(prog_fd, 0, BPF_NETFILTER, &lopts);
13139
if (link_fd < 0) {
13140
link_fd = -errno;
13141
free(link);
13142
pr_warn("prog '%s': failed to attach to netfilter: %s\n",
13143
prog->name, errstr(link_fd));
13144
return libbpf_err_ptr(link_fd);
13145
}
13146
link->fd = link_fd;
13147
13148
return link;
13149
}
13150
13151
struct bpf_link *bpf_program__attach(const struct bpf_program *prog)
13152
{
13153
struct bpf_link *link = NULL;
13154
int err;
13155
13156
if (!prog->sec_def || !prog->sec_def->prog_attach_fn)
13157
return libbpf_err_ptr(-EOPNOTSUPP);
13158
13159
if (bpf_program__fd(prog) < 0) {
13160
pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n",
13161
prog->name);
13162
return libbpf_err_ptr(-EINVAL);
13163
}
13164
13165
err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, &link);
13166
if (err)
13167
return libbpf_err_ptr(err);
13168
13169
/* When calling bpf_program__attach() explicitly, auto-attach support
13170
* is expected to work, so NULL returned link is considered an error.
13171
* This is different for skeleton's attach, see comment in
13172
* bpf_object__attach_skeleton().
13173
*/
13174
if (!link)
13175
return libbpf_err_ptr(-EOPNOTSUPP);
13176
13177
return link;
13178
}
13179
13180
struct bpf_link_struct_ops {
13181
struct bpf_link link;
13182
int map_fd;
13183
};
13184
13185
static int bpf_link__detach_struct_ops(struct bpf_link *link)
13186
{
13187
struct bpf_link_struct_ops *st_link;
13188
__u32 zero = 0;
13189
13190
st_link = container_of(link, struct bpf_link_struct_ops, link);
13191
13192
if (st_link->map_fd < 0)
13193
/* w/o a real link */
13194
return bpf_map_delete_elem(link->fd, &zero);
13195
13196
return close(link->fd);
13197
}
13198
13199
struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map)
13200
{
13201
struct bpf_link_struct_ops *link;
13202
__u32 zero = 0;
13203
int err, fd;
13204
13205
if (!bpf_map__is_struct_ops(map)) {
13206
pr_warn("map '%s': can't attach non-struct_ops map\n", map->name);
13207
return libbpf_err_ptr(-EINVAL);
13208
}
13209
13210
if (map->fd < 0) {
13211
pr_warn("map '%s': can't attach BPF map without FD (was it created?)\n", map->name);
13212
return libbpf_err_ptr(-EINVAL);
13213
}
13214
13215
link = calloc(1, sizeof(*link));
13216
if (!link)
13217
return libbpf_err_ptr(-EINVAL);
13218
13219
/* kern_vdata should be prepared during the loading phase. */
13220
err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0);
13221
/* It can be EBUSY if the map has been used to create or
13222
* update a link before. We don't allow updating the value of
13223
* a struct_ops once it is set. That ensures that the value
13224
* never changed. So, it is safe to skip EBUSY.
13225
*/
13226
if (err && (!(map->def.map_flags & BPF_F_LINK) || err != -EBUSY)) {
13227
free(link);
13228
return libbpf_err_ptr(err);
13229
}
13230
13231
link->link.detach = bpf_link__detach_struct_ops;
13232
13233
if (!(map->def.map_flags & BPF_F_LINK)) {
13234
/* w/o a real link */
13235
link->link.fd = map->fd;
13236
link->map_fd = -1;
13237
return &link->link;
13238
}
13239
13240
fd = bpf_link_create(map->fd, 0, BPF_STRUCT_OPS, NULL);
13241
if (fd < 0) {
13242
free(link);
13243
return libbpf_err_ptr(fd);
13244
}
13245
13246
link->link.fd = fd;
13247
link->map_fd = map->fd;
13248
13249
return &link->link;
13250
}
13251
13252
/*
13253
* Swap the back struct_ops of a link with a new struct_ops map.
13254
*/
13255
int bpf_link__update_map(struct bpf_link *link, const struct bpf_map *map)
13256
{
13257
struct bpf_link_struct_ops *st_ops_link;
13258
__u32 zero = 0;
13259
int err;
13260
13261
if (!bpf_map__is_struct_ops(map))
13262
return libbpf_err(-EINVAL);
13263
13264
if (map->fd < 0) {
13265
pr_warn("map '%s': can't use BPF map without FD (was it created?)\n", map->name);
13266
return libbpf_err(-EINVAL);
13267
}
13268
13269
st_ops_link = container_of(link, struct bpf_link_struct_ops, link);
13270
/* Ensure the type of a link is correct */
13271
if (st_ops_link->map_fd < 0)
13272
return libbpf_err(-EINVAL);
13273
13274
err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0);
13275
/* It can be EBUSY if the map has been used to create or
13276
* update a link before. We don't allow updating the value of
13277
* a struct_ops once it is set. That ensures that the value
13278
* never changed. So, it is safe to skip EBUSY.
13279
*/
13280
if (err && err != -EBUSY)
13281
return err;
13282
13283
err = bpf_link_update(link->fd, map->fd, NULL);
13284
if (err < 0)
13285
return err;
13286
13287
st_ops_link->map_fd = map->fd;
13288
13289
return 0;
13290
}
13291
13292
typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr,
13293
void *private_data);
13294
13295
static enum bpf_perf_event_ret
13296
perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
13297
void **copy_mem, size_t *copy_size,
13298
bpf_perf_event_print_t fn, void *private_data)
13299
{
13300
struct perf_event_mmap_page *header = mmap_mem;
13301
__u64 data_head = ring_buffer_read_head(header);
13302
__u64 data_tail = header->data_tail;
13303
void *base = ((__u8 *)header) + page_size;
13304
int ret = LIBBPF_PERF_EVENT_CONT;
13305
struct perf_event_header *ehdr;
13306
size_t ehdr_size;
13307
13308
while (data_head != data_tail) {
13309
ehdr = base + (data_tail & (mmap_size - 1));
13310
ehdr_size = ehdr->size;
13311
13312
if (((void *)ehdr) + ehdr_size > base + mmap_size) {
13313
void *copy_start = ehdr;
13314
size_t len_first = base + mmap_size - copy_start;
13315
size_t len_secnd = ehdr_size - len_first;
13316
13317
if (*copy_size < ehdr_size) {
13318
free(*copy_mem);
13319
*copy_mem = malloc(ehdr_size);
13320
if (!*copy_mem) {
13321
*copy_size = 0;
13322
ret = LIBBPF_PERF_EVENT_ERROR;
13323
break;
13324
}
13325
*copy_size = ehdr_size;
13326
}
13327
13328
memcpy(*copy_mem, copy_start, len_first);
13329
memcpy(*copy_mem + len_first, base, len_secnd);
13330
ehdr = *copy_mem;
13331
}
13332
13333
ret = fn(ehdr, private_data);
13334
data_tail += ehdr_size;
13335
if (ret != LIBBPF_PERF_EVENT_CONT)
13336
break;
13337
}
13338
13339
ring_buffer_write_tail(header, data_tail);
13340
return libbpf_err(ret);
13341
}
13342
13343
struct perf_buffer;
13344
13345
struct perf_buffer_params {
13346
struct perf_event_attr *attr;
13347
/* if event_cb is specified, it takes precendence */
13348
perf_buffer_event_fn event_cb;
13349
/* sample_cb and lost_cb are higher-level common-case callbacks */
13350
perf_buffer_sample_fn sample_cb;
13351
perf_buffer_lost_fn lost_cb;
13352
void *ctx;
13353
int cpu_cnt;
13354
int *cpus;
13355
int *map_keys;
13356
};
13357
13358
struct perf_cpu_buf {
13359
struct perf_buffer *pb;
13360
void *base; /* mmap()'ed memory */
13361
void *buf; /* for reconstructing segmented data */
13362
size_t buf_size;
13363
int fd;
13364
int cpu;
13365
int map_key;
13366
};
13367
13368
struct perf_buffer {
13369
perf_buffer_event_fn event_cb;
13370
perf_buffer_sample_fn sample_cb;
13371
perf_buffer_lost_fn lost_cb;
13372
void *ctx; /* passed into callbacks */
13373
13374
size_t page_size;
13375
size_t mmap_size;
13376
struct perf_cpu_buf **cpu_bufs;
13377
struct epoll_event *events;
13378
int cpu_cnt; /* number of allocated CPU buffers */
13379
int epoll_fd; /* perf event FD */
13380
int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
13381
};
13382
13383
static void perf_buffer__free_cpu_buf(struct perf_buffer *pb,
13384
struct perf_cpu_buf *cpu_buf)
13385
{
13386
if (!cpu_buf)
13387
return;
13388
if (cpu_buf->base &&
13389
munmap(cpu_buf->base, pb->mmap_size + pb->page_size))
13390
pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu);
13391
if (cpu_buf->fd >= 0) {
13392
ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0);
13393
close(cpu_buf->fd);
13394
}
13395
free(cpu_buf->buf);
13396
free(cpu_buf);
13397
}
13398
13399
void perf_buffer__free(struct perf_buffer *pb)
13400
{
13401
int i;
13402
13403
if (IS_ERR_OR_NULL(pb))
13404
return;
13405
if (pb->cpu_bufs) {
13406
for (i = 0; i < pb->cpu_cnt; i++) {
13407
struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
13408
13409
if (!cpu_buf)
13410
continue;
13411
13412
bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key);
13413
perf_buffer__free_cpu_buf(pb, cpu_buf);
13414
}
13415
free(pb->cpu_bufs);
13416
}
13417
if (pb->epoll_fd >= 0)
13418
close(pb->epoll_fd);
13419
free(pb->events);
13420
free(pb);
13421
}
13422
13423
static struct perf_cpu_buf *
13424
perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr,
13425
int cpu, int map_key)
13426
{
13427
struct perf_cpu_buf *cpu_buf;
13428
int err;
13429
13430
cpu_buf = calloc(1, sizeof(*cpu_buf));
13431
if (!cpu_buf)
13432
return ERR_PTR(-ENOMEM);
13433
13434
cpu_buf->pb = pb;
13435
cpu_buf->cpu = cpu;
13436
cpu_buf->map_key = map_key;
13437
13438
cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu,
13439
-1, PERF_FLAG_FD_CLOEXEC);
13440
if (cpu_buf->fd < 0) {
13441
err = -errno;
13442
pr_warn("failed to open perf buffer event on cpu #%d: %s\n",
13443
cpu, errstr(err));
13444
goto error;
13445
}
13446
13447
cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size,
13448
PROT_READ | PROT_WRITE, MAP_SHARED,
13449
cpu_buf->fd, 0);
13450
if (cpu_buf->base == MAP_FAILED) {
13451
cpu_buf->base = NULL;
13452
err = -errno;
13453
pr_warn("failed to mmap perf buffer on cpu #%d: %s\n",
13454
cpu, errstr(err));
13455
goto error;
13456
}
13457
13458
if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
13459
err = -errno;
13460
pr_warn("failed to enable perf buffer event on cpu #%d: %s\n",
13461
cpu, errstr(err));
13462
goto error;
13463
}
13464
13465
return cpu_buf;
13466
13467
error:
13468
perf_buffer__free_cpu_buf(pb, cpu_buf);
13469
return (struct perf_cpu_buf *)ERR_PTR(err);
13470
}
13471
13472
static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
13473
struct perf_buffer_params *p);
13474
13475
struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
13476
perf_buffer_sample_fn sample_cb,
13477
perf_buffer_lost_fn lost_cb,
13478
void *ctx,
13479
const struct perf_buffer_opts *opts)
13480
{
13481
const size_t attr_sz = sizeof(struct perf_event_attr);
13482
struct perf_buffer_params p = {};
13483
struct perf_event_attr attr;
13484
__u32 sample_period;
13485
13486
if (!OPTS_VALID(opts, perf_buffer_opts))
13487
return libbpf_err_ptr(-EINVAL);
13488
13489
sample_period = OPTS_GET(opts, sample_period, 1);
13490
if (!sample_period)
13491
sample_period = 1;
13492
13493
memset(&attr, 0, attr_sz);
13494
attr.size = attr_sz;
13495
attr.config = PERF_COUNT_SW_BPF_OUTPUT;
13496
attr.type = PERF_TYPE_SOFTWARE;
13497
attr.sample_type = PERF_SAMPLE_RAW;
13498
attr.wakeup_events = sample_period;
13499
13500
p.attr = &attr;
13501
p.sample_cb = sample_cb;
13502
p.lost_cb = lost_cb;
13503
p.ctx = ctx;
13504
13505
return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
13506
}
13507
13508
struct perf_buffer *perf_buffer__new_raw(int map_fd, size_t page_cnt,
13509
struct perf_event_attr *attr,
13510
perf_buffer_event_fn event_cb, void *ctx,
13511
const struct perf_buffer_raw_opts *opts)
13512
{
13513
struct perf_buffer_params p = {};
13514
13515
if (!attr)
13516
return libbpf_err_ptr(-EINVAL);
13517
13518
if (!OPTS_VALID(opts, perf_buffer_raw_opts))
13519
return libbpf_err_ptr(-EINVAL);
13520
13521
p.attr = attr;
13522
p.event_cb = event_cb;
13523
p.ctx = ctx;
13524
p.cpu_cnt = OPTS_GET(opts, cpu_cnt, 0);
13525
p.cpus = OPTS_GET(opts, cpus, NULL);
13526
p.map_keys = OPTS_GET(opts, map_keys, NULL);
13527
13528
return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
13529
}
13530
13531
static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
13532
struct perf_buffer_params *p)
13533
{
13534
const char *online_cpus_file = "/sys/devices/system/cpu/online";
13535
struct bpf_map_info map;
13536
struct perf_buffer *pb;
13537
bool *online = NULL;
13538
__u32 map_info_len;
13539
int err, i, j, n;
13540
13541
if (page_cnt == 0 || (page_cnt & (page_cnt - 1))) {
13542
pr_warn("page count should be power of two, but is %zu\n",
13543
page_cnt);
13544
return ERR_PTR(-EINVAL);
13545
}
13546
13547
/* best-effort sanity checks */
13548
memset(&map, 0, sizeof(map));
13549
map_info_len = sizeof(map);
13550
err = bpf_map_get_info_by_fd(map_fd, &map, &map_info_len);
13551
if (err) {
13552
err = -errno;
13553
/* if BPF_OBJ_GET_INFO_BY_FD is supported, will return
13554
* -EBADFD, -EFAULT, or -E2BIG on real error
13555
*/
13556
if (err != -EINVAL) {
13557
pr_warn("failed to get map info for map FD %d: %s\n",
13558
map_fd, errstr(err));
13559
return ERR_PTR(err);
13560
}
13561
pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n",
13562
map_fd);
13563
} else {
13564
if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
13565
pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
13566
map.name);
13567
return ERR_PTR(-EINVAL);
13568
}
13569
}
13570
13571
pb = calloc(1, sizeof(*pb));
13572
if (!pb)
13573
return ERR_PTR(-ENOMEM);
13574
13575
pb->event_cb = p->event_cb;
13576
pb->sample_cb = p->sample_cb;
13577
pb->lost_cb = p->lost_cb;
13578
pb->ctx = p->ctx;
13579
13580
pb->page_size = getpagesize();
13581
pb->mmap_size = pb->page_size * page_cnt;
13582
pb->map_fd = map_fd;
13583
13584
pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
13585
if (pb->epoll_fd < 0) {
13586
err = -errno;
13587
pr_warn("failed to create epoll instance: %s\n",
13588
errstr(err));
13589
goto error;
13590
}
13591
13592
if (p->cpu_cnt > 0) {
13593
pb->cpu_cnt = p->cpu_cnt;
13594
} else {
13595
pb->cpu_cnt = libbpf_num_possible_cpus();
13596
if (pb->cpu_cnt < 0) {
13597
err = pb->cpu_cnt;
13598
goto error;
13599
}
13600
if (map.max_entries && map.max_entries < pb->cpu_cnt)
13601
pb->cpu_cnt = map.max_entries;
13602
}
13603
13604
pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events));
13605
if (!pb->events) {
13606
err = -ENOMEM;
13607
pr_warn("failed to allocate events: out of memory\n");
13608
goto error;
13609
}
13610
pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs));
13611
if (!pb->cpu_bufs) {
13612
err = -ENOMEM;
13613
pr_warn("failed to allocate buffers: out of memory\n");
13614
goto error;
13615
}
13616
13617
err = parse_cpu_mask_file(online_cpus_file, &online, &n);
13618
if (err) {
13619
pr_warn("failed to get online CPU mask: %s\n", errstr(err));
13620
goto error;
13621
}
13622
13623
for (i = 0, j = 0; i < pb->cpu_cnt; i++) {
13624
struct perf_cpu_buf *cpu_buf;
13625
int cpu, map_key;
13626
13627
cpu = p->cpu_cnt > 0 ? p->cpus[i] : i;
13628
map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i;
13629
13630
/* in case user didn't explicitly requested particular CPUs to
13631
* be attached to, skip offline/not present CPUs
13632
*/
13633
if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu]))
13634
continue;
13635
13636
cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key);
13637
if (IS_ERR(cpu_buf)) {
13638
err = PTR_ERR(cpu_buf);
13639
goto error;
13640
}
13641
13642
pb->cpu_bufs[j] = cpu_buf;
13643
13644
err = bpf_map_update_elem(pb->map_fd, &map_key,
13645
&cpu_buf->fd, 0);
13646
if (err) {
13647
err = -errno;
13648
pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n",
13649
cpu, map_key, cpu_buf->fd,
13650
errstr(err));
13651
goto error;
13652
}
13653
13654
pb->events[j].events = EPOLLIN;
13655
pb->events[j].data.ptr = cpu_buf;
13656
if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd,
13657
&pb->events[j]) < 0) {
13658
err = -errno;
13659
pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n",
13660
cpu, cpu_buf->fd,
13661
errstr(err));
13662
goto error;
13663
}
13664
j++;
13665
}
13666
pb->cpu_cnt = j;
13667
free(online);
13668
13669
return pb;
13670
13671
error:
13672
free(online);
13673
if (pb)
13674
perf_buffer__free(pb);
13675
return ERR_PTR(err);
13676
}
13677
13678
struct perf_sample_raw {
13679
struct perf_event_header header;
13680
uint32_t size;
13681
char data[];
13682
};
13683
13684
struct perf_sample_lost {
13685
struct perf_event_header header;
13686
uint64_t id;
13687
uint64_t lost;
13688
uint64_t sample_id;
13689
};
13690
13691
static enum bpf_perf_event_ret
13692
perf_buffer__process_record(struct perf_event_header *e, void *ctx)
13693
{
13694
struct perf_cpu_buf *cpu_buf = ctx;
13695
struct perf_buffer *pb = cpu_buf->pb;
13696
void *data = e;
13697
13698
/* user wants full control over parsing perf event */
13699
if (pb->event_cb)
13700
return pb->event_cb(pb->ctx, cpu_buf->cpu, e);
13701
13702
switch (e->type) {
13703
case PERF_RECORD_SAMPLE: {
13704
struct perf_sample_raw *s = data;
13705
13706
if (pb->sample_cb)
13707
pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size);
13708
break;
13709
}
13710
case PERF_RECORD_LOST: {
13711
struct perf_sample_lost *s = data;
13712
13713
if (pb->lost_cb)
13714
pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost);
13715
break;
13716
}
13717
default:
13718
pr_warn("unknown perf sample type %d\n", e->type);
13719
return LIBBPF_PERF_EVENT_ERROR;
13720
}
13721
return LIBBPF_PERF_EVENT_CONT;
13722
}
13723
13724
static int perf_buffer__process_records(struct perf_buffer *pb,
13725
struct perf_cpu_buf *cpu_buf)
13726
{
13727
enum bpf_perf_event_ret ret;
13728
13729
ret = perf_event_read_simple(cpu_buf->base, pb->mmap_size,
13730
pb->page_size, &cpu_buf->buf,
13731
&cpu_buf->buf_size,
13732
perf_buffer__process_record, cpu_buf);
13733
if (ret != LIBBPF_PERF_EVENT_CONT)
13734
return ret;
13735
return 0;
13736
}
13737
13738
int perf_buffer__epoll_fd(const struct perf_buffer *pb)
13739
{
13740
return pb->epoll_fd;
13741
}
13742
13743
int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
13744
{
13745
int i, cnt, err;
13746
13747
cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms);
13748
if (cnt < 0)
13749
return -errno;
13750
13751
for (i = 0; i < cnt; i++) {
13752
struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr;
13753
13754
err = perf_buffer__process_records(pb, cpu_buf);
13755
if (err) {
13756
pr_warn("error while processing records: %s\n", errstr(err));
13757
return libbpf_err(err);
13758
}
13759
}
13760
return cnt;
13761
}
13762
13763
/* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer
13764
* manager.
13765
*/
13766
size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb)
13767
{
13768
return pb->cpu_cnt;
13769
}
13770
13771
/*
13772
* Return perf_event FD of a ring buffer in *buf_idx* slot of
13773
* PERF_EVENT_ARRAY BPF map. This FD can be polled for new data using
13774
* select()/poll()/epoll() Linux syscalls.
13775
*/
13776
int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx)
13777
{
13778
struct perf_cpu_buf *cpu_buf;
13779
13780
if (buf_idx >= pb->cpu_cnt)
13781
return libbpf_err(-EINVAL);
13782
13783
cpu_buf = pb->cpu_bufs[buf_idx];
13784
if (!cpu_buf)
13785
return libbpf_err(-ENOENT);
13786
13787
return cpu_buf->fd;
13788
}
13789
13790
int perf_buffer__buffer(struct perf_buffer *pb, int buf_idx, void **buf, size_t *buf_size)
13791
{
13792
struct perf_cpu_buf *cpu_buf;
13793
13794
if (buf_idx >= pb->cpu_cnt)
13795
return libbpf_err(-EINVAL);
13796
13797
cpu_buf = pb->cpu_bufs[buf_idx];
13798
if (!cpu_buf)
13799
return libbpf_err(-ENOENT);
13800
13801
*buf = cpu_buf->base;
13802
*buf_size = pb->mmap_size;
13803
return 0;
13804
}
13805
13806
/*
13807
* Consume data from perf ring buffer corresponding to slot *buf_idx* in
13808
* PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to
13809
* consume, do nothing and return success.
13810
* Returns:
13811
* - 0 on success;
13812
* - <0 on failure.
13813
*/
13814
int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx)
13815
{
13816
struct perf_cpu_buf *cpu_buf;
13817
13818
if (buf_idx >= pb->cpu_cnt)
13819
return libbpf_err(-EINVAL);
13820
13821
cpu_buf = pb->cpu_bufs[buf_idx];
13822
if (!cpu_buf)
13823
return libbpf_err(-ENOENT);
13824
13825
return perf_buffer__process_records(pb, cpu_buf);
13826
}
13827
13828
int perf_buffer__consume(struct perf_buffer *pb)
13829
{
13830
int i, err;
13831
13832
for (i = 0; i < pb->cpu_cnt; i++) {
13833
struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
13834
13835
if (!cpu_buf)
13836
continue;
13837
13838
err = perf_buffer__process_records(pb, cpu_buf);
13839
if (err) {
13840
pr_warn("perf_buffer: failed to process records in buffer #%d: %s\n",
13841
i, errstr(err));
13842
return libbpf_err(err);
13843
}
13844
}
13845
return 0;
13846
}
13847
13848
int bpf_program__set_attach_target(struct bpf_program *prog,
13849
int attach_prog_fd,
13850
const char *attach_func_name)
13851
{
13852
int btf_obj_fd = 0, btf_id = 0, err;
13853
13854
if (!prog || attach_prog_fd < 0)
13855
return libbpf_err(-EINVAL);
13856
13857
if (prog->obj->state >= OBJ_LOADED)
13858
return libbpf_err(-EINVAL);
13859
13860
if (attach_prog_fd && !attach_func_name) {
13861
/* remember attach_prog_fd and let bpf_program__load() find
13862
* BTF ID during the program load
13863
*/
13864
prog->attach_prog_fd = attach_prog_fd;
13865
return 0;
13866
}
13867
13868
if (attach_prog_fd) {
13869
btf_id = libbpf_find_prog_btf_id(attach_func_name,
13870
attach_prog_fd, prog->obj->token_fd);
13871
if (btf_id < 0)
13872
return libbpf_err(btf_id);
13873
} else {
13874
if (!attach_func_name)
13875
return libbpf_err(-EINVAL);
13876
13877
/* load btf_vmlinux, if not yet */
13878
err = bpf_object__load_vmlinux_btf(prog->obj, true);
13879
if (err)
13880
return libbpf_err(err);
13881
err = find_kernel_btf_id(prog->obj, attach_func_name,
13882
prog->expected_attach_type,
13883
&btf_obj_fd, &btf_id);
13884
if (err)
13885
return libbpf_err(err);
13886
}
13887
13888
prog->attach_btf_id = btf_id;
13889
prog->attach_btf_obj_fd = btf_obj_fd;
13890
prog->attach_prog_fd = attach_prog_fd;
13891
return 0;
13892
}
13893
13894
int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz)
13895
{
13896
int err = 0, n, len, start, end = -1;
13897
bool *tmp;
13898
13899
*mask = NULL;
13900
*mask_sz = 0;
13901
13902
/* Each sub string separated by ',' has format \d+-\d+ or \d+ */
13903
while (*s) {
13904
if (*s == ',' || *s == '\n') {
13905
s++;
13906
continue;
13907
}
13908
n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len);
13909
if (n <= 0 || n > 2) {
13910
pr_warn("Failed to get CPU range %s: %d\n", s, n);
13911
err = -EINVAL;
13912
goto cleanup;
13913
} else if (n == 1) {
13914
end = start;
13915
}
13916
if (start < 0 || start > end) {
13917
pr_warn("Invalid CPU range [%d,%d] in %s\n",
13918
start, end, s);
13919
err = -EINVAL;
13920
goto cleanup;
13921
}
13922
tmp = realloc(*mask, end + 1);
13923
if (!tmp) {
13924
err = -ENOMEM;
13925
goto cleanup;
13926
}
13927
*mask = tmp;
13928
memset(tmp + *mask_sz, 0, start - *mask_sz);
13929
memset(tmp + start, 1, end - start + 1);
13930
*mask_sz = end + 1;
13931
s += len;
13932
}
13933
if (!*mask_sz) {
13934
pr_warn("Empty CPU range\n");
13935
return -EINVAL;
13936
}
13937
return 0;
13938
cleanup:
13939
free(*mask);
13940
*mask = NULL;
13941
return err;
13942
}
13943
13944
int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz)
13945
{
13946
int fd, err = 0, len;
13947
char buf[128];
13948
13949
fd = open(fcpu, O_RDONLY | O_CLOEXEC);
13950
if (fd < 0) {
13951
err = -errno;
13952
pr_warn("Failed to open cpu mask file %s: %s\n", fcpu, errstr(err));
13953
return err;
13954
}
13955
len = read(fd, buf, sizeof(buf));
13956
close(fd);
13957
if (len <= 0) {
13958
err = len ? -errno : -EINVAL;
13959
pr_warn("Failed to read cpu mask from %s: %s\n", fcpu, errstr(err));
13960
return err;
13961
}
13962
if (len >= sizeof(buf)) {
13963
pr_warn("CPU mask is too big in file %s\n", fcpu);
13964
return -E2BIG;
13965
}
13966
buf[len] = '\0';
13967
13968
return parse_cpu_mask_str(buf, mask, mask_sz);
13969
}
13970
13971
int libbpf_num_possible_cpus(void)
13972
{
13973
static const char *fcpu = "/sys/devices/system/cpu/possible";
13974
static int cpus;
13975
int err, n, i, tmp_cpus;
13976
bool *mask;
13977
13978
tmp_cpus = READ_ONCE(cpus);
13979
if (tmp_cpus > 0)
13980
return tmp_cpus;
13981
13982
err = parse_cpu_mask_file(fcpu, &mask, &n);
13983
if (err)
13984
return libbpf_err(err);
13985
13986
tmp_cpus = 0;
13987
for (i = 0; i < n; i++) {
13988
if (mask[i])
13989
tmp_cpus++;
13990
}
13991
free(mask);
13992
13993
WRITE_ONCE(cpus, tmp_cpus);
13994
return tmp_cpus;
13995
}
13996
13997
static int populate_skeleton_maps(const struct bpf_object *obj,
13998
struct bpf_map_skeleton *maps,
13999
size_t map_cnt, size_t map_skel_sz)
14000
{
14001
int i;
14002
14003
for (i = 0; i < map_cnt; i++) {
14004
struct bpf_map_skeleton *map_skel = (void *)maps + i * map_skel_sz;
14005
struct bpf_map **map = map_skel->map;
14006
const char *name = map_skel->name;
14007
void **mmaped = map_skel->mmaped;
14008
14009
*map = bpf_object__find_map_by_name(obj, name);
14010
if (!*map) {
14011
pr_warn("failed to find skeleton map '%s'\n", name);
14012
return -ESRCH;
14013
}
14014
14015
/* externs shouldn't be pre-setup from user code */
14016
if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG)
14017
*mmaped = (*map)->mmaped;
14018
}
14019
return 0;
14020
}
14021
14022
static int populate_skeleton_progs(const struct bpf_object *obj,
14023
struct bpf_prog_skeleton *progs,
14024
size_t prog_cnt, size_t prog_skel_sz)
14025
{
14026
int i;
14027
14028
for (i = 0; i < prog_cnt; i++) {
14029
struct bpf_prog_skeleton *prog_skel = (void *)progs + i * prog_skel_sz;
14030
struct bpf_program **prog = prog_skel->prog;
14031
const char *name = prog_skel->name;
14032
14033
*prog = bpf_object__find_program_by_name(obj, name);
14034
if (!*prog) {
14035
pr_warn("failed to find skeleton program '%s'\n", name);
14036
return -ESRCH;
14037
}
14038
}
14039
return 0;
14040
}
14041
14042
int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
14043
const struct bpf_object_open_opts *opts)
14044
{
14045
struct bpf_object *obj;
14046
int err;
14047
14048
obj = bpf_object_open(NULL, s->data, s->data_sz, s->name, opts);
14049
if (IS_ERR(obj)) {
14050
err = PTR_ERR(obj);
14051
pr_warn("failed to initialize skeleton BPF object '%s': %s\n",
14052
s->name, errstr(err));
14053
return libbpf_err(err);
14054
}
14055
14056
*s->obj = obj;
14057
err = populate_skeleton_maps(obj, s->maps, s->map_cnt, s->map_skel_sz);
14058
if (err) {
14059
pr_warn("failed to populate skeleton maps for '%s': %s\n", s->name, errstr(err));
14060
return libbpf_err(err);
14061
}
14062
14063
err = populate_skeleton_progs(obj, s->progs, s->prog_cnt, s->prog_skel_sz);
14064
if (err) {
14065
pr_warn("failed to populate skeleton progs for '%s': %s\n", s->name, errstr(err));
14066
return libbpf_err(err);
14067
}
14068
14069
return 0;
14070
}
14071
14072
int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s)
14073
{
14074
int err, len, var_idx, i;
14075
const char *var_name;
14076
const struct bpf_map *map;
14077
struct btf *btf;
14078
__u32 map_type_id;
14079
const struct btf_type *map_type, *var_type;
14080
const struct bpf_var_skeleton *var_skel;
14081
struct btf_var_secinfo *var;
14082
14083
if (!s->obj)
14084
return libbpf_err(-EINVAL);
14085
14086
btf = bpf_object__btf(s->obj);
14087
if (!btf) {
14088
pr_warn("subskeletons require BTF at runtime (object %s)\n",
14089
bpf_object__name(s->obj));
14090
return libbpf_err(-errno);
14091
}
14092
14093
err = populate_skeleton_maps(s->obj, s->maps, s->map_cnt, s->map_skel_sz);
14094
if (err) {
14095
pr_warn("failed to populate subskeleton maps: %s\n", errstr(err));
14096
return libbpf_err(err);
14097
}
14098
14099
err = populate_skeleton_progs(s->obj, s->progs, s->prog_cnt, s->prog_skel_sz);
14100
if (err) {
14101
pr_warn("failed to populate subskeleton maps: %s\n", errstr(err));
14102
return libbpf_err(err);
14103
}
14104
14105
for (var_idx = 0; var_idx < s->var_cnt; var_idx++) {
14106
var_skel = (void *)s->vars + var_idx * s->var_skel_sz;
14107
map = *var_skel->map;
14108
map_type_id = bpf_map__btf_value_type_id(map);
14109
map_type = btf__type_by_id(btf, map_type_id);
14110
14111
if (!btf_is_datasec(map_type)) {
14112
pr_warn("type for map '%1$s' is not a datasec: %2$s\n",
14113
bpf_map__name(map),
14114
__btf_kind_str(btf_kind(map_type)));
14115
return libbpf_err(-EINVAL);
14116
}
14117
14118
len = btf_vlen(map_type);
14119
var = btf_var_secinfos(map_type);
14120
for (i = 0; i < len; i++, var++) {
14121
var_type = btf__type_by_id(btf, var->type);
14122
var_name = btf__name_by_offset(btf, var_type->name_off);
14123
if (strcmp(var_name, var_skel->name) == 0) {
14124
*var_skel->addr = map->mmaped + var->offset;
14125
break;
14126
}
14127
}
14128
}
14129
return 0;
14130
}
14131
14132
void bpf_object__destroy_subskeleton(struct bpf_object_subskeleton *s)
14133
{
14134
if (!s)
14135
return;
14136
free(s->maps);
14137
free(s->progs);
14138
free(s->vars);
14139
free(s);
14140
}
14141
14142
int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
14143
{
14144
int i, err;
14145
14146
err = bpf_object__load(*s->obj);
14147
if (err) {
14148
pr_warn("failed to load BPF skeleton '%s': %s\n", s->name, errstr(err));
14149
return libbpf_err(err);
14150
}
14151
14152
for (i = 0; i < s->map_cnt; i++) {
14153
struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz;
14154
struct bpf_map *map = *map_skel->map;
14155
14156
if (!map_skel->mmaped)
14157
continue;
14158
14159
*map_skel->mmaped = map->mmaped;
14160
}
14161
14162
return 0;
14163
}
14164
14165
int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
14166
{
14167
int i, err;
14168
14169
for (i = 0; i < s->prog_cnt; i++) {
14170
struct bpf_prog_skeleton *prog_skel = (void *)s->progs + i * s->prog_skel_sz;
14171
struct bpf_program *prog = *prog_skel->prog;
14172
struct bpf_link **link = prog_skel->link;
14173
14174
if (!prog->autoload || !prog->autoattach)
14175
continue;
14176
14177
/* auto-attaching not supported for this program */
14178
if (!prog->sec_def || !prog->sec_def->prog_attach_fn)
14179
continue;
14180
14181
/* if user already set the link manually, don't attempt auto-attach */
14182
if (*link)
14183
continue;
14184
14185
err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, link);
14186
if (err) {
14187
pr_warn("prog '%s': failed to auto-attach: %s\n",
14188
bpf_program__name(prog), errstr(err));
14189
return libbpf_err(err);
14190
}
14191
14192
/* It's possible that for some SEC() definitions auto-attach
14193
* is supported in some cases (e.g., if definition completely
14194
* specifies target information), but is not in other cases.
14195
* SEC("uprobe") is one such case. If user specified target
14196
* binary and function name, such BPF program can be
14197
* auto-attached. But if not, it shouldn't trigger skeleton's
14198
* attach to fail. It should just be skipped.
14199
* attach_fn signals such case with returning 0 (no error) and
14200
* setting link to NULL.
14201
*/
14202
}
14203
14204
14205
for (i = 0; i < s->map_cnt; i++) {
14206
struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz;
14207
struct bpf_map *map = *map_skel->map;
14208
struct bpf_link **link;
14209
14210
if (!map->autocreate || !map->autoattach)
14211
continue;
14212
14213
/* only struct_ops maps can be attached */
14214
if (!bpf_map__is_struct_ops(map))
14215
continue;
14216
14217
/* skeleton is created with earlier version of bpftool, notify user */
14218
if (s->map_skel_sz < offsetofend(struct bpf_map_skeleton, link)) {
14219
pr_warn("map '%s': BPF skeleton version is old, skipping map auto-attachment...\n",
14220
bpf_map__name(map));
14221
continue;
14222
}
14223
14224
link = map_skel->link;
14225
if (!link) {
14226
pr_warn("map '%s': BPF map skeleton link is uninitialized\n",
14227
bpf_map__name(map));
14228
continue;
14229
}
14230
14231
if (*link)
14232
continue;
14233
14234
*link = bpf_map__attach_struct_ops(map);
14235
if (!*link) {
14236
err = -errno;
14237
pr_warn("map '%s': failed to auto-attach: %s\n",
14238
bpf_map__name(map), errstr(err));
14239
return libbpf_err(err);
14240
}
14241
}
14242
14243
return 0;
14244
}
14245
14246
void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
14247
{
14248
int i;
14249
14250
for (i = 0; i < s->prog_cnt; i++) {
14251
struct bpf_prog_skeleton *prog_skel = (void *)s->progs + i * s->prog_skel_sz;
14252
struct bpf_link **link = prog_skel->link;
14253
14254
bpf_link__destroy(*link);
14255
*link = NULL;
14256
}
14257
14258
if (s->map_skel_sz < sizeof(struct bpf_map_skeleton))
14259
return;
14260
14261
for (i = 0; i < s->map_cnt; i++) {
14262
struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz;
14263
struct bpf_link **link = map_skel->link;
14264
14265
if (link) {
14266
bpf_link__destroy(*link);
14267
*link = NULL;
14268
}
14269
}
14270
}
14271
14272
void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
14273
{
14274
if (!s)
14275
return;
14276
14277
bpf_object__detach_skeleton(s);
14278
if (s->obj)
14279
bpf_object__close(*s->obj);
14280
free(s->maps);
14281
free(s->progs);
14282
free(s);
14283
}
14284
14285