libbpf-bootstrap Development Guide: Using ringbuf for communication - bootstrap

Table of contents

code

        Comm data structure part (for bpf&user data share)

BPF program section

Function Description

Header file introduction instructions

bpf_probe_read_str reads filename

bpf_ringbuf_submit submits information to the BPF ring buffer

bpf_map_delete_elem

User program part

ring_buffer__new

ring_buffer__poll

ring_buffer__free

Execution effect


code

comm data structure part (for bpf&user data share)
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
/* Copyright (c) 2020 Facebook */
#ifndef __BOOTSTRAP_H
#define __BOOTSTRAP_H

#define TASK_COMM_LEN	 16
#define MAX_FILENAME_LEN 127

struct event {
	int pid;
	int ppid;
	unsigned exit_code;
	unsigned long long duration_ns;
	char comm[TASK_COMM_LEN];
	char filename[MAX_FILENAME_LEN];
	bool exit_event;
};

#endif /* __BOOTSTRAP_H */
BPF program section
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/* Copyright (c) 2020 Facebook */
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
#include "bootstrap.h"

char LICENSE[] SEC("license") = "Dual BSD/GPL";

struct {
	__uint(type, BPF_MAP_TYPE_HASH);
	__uint(max_entries, 8192);
	__type(key, pid_t);
	__type(value, u64);
} exec_start SEC(".maps");

struct {
	__uint(type, BPF_MAP_TYPE_RINGBUF);
	__uint(max_entries, 256 * 1024);
} rb SEC(".maps");

const volatile unsigned long long min_duration_ns = 0;

SEC("tp/sched/sched_process_exec")
int handle_exec(struct trace_event_raw_sched_process_exec *ctx)
{
	struct task_struct *task;
	unsigned fname_off;
	struct event *e;
	pid_t pid;
	u64 ts;

	/* remember time exec() was executed for this PID */
	pid = bpf_get_current_pid_tgid() >> 32;
	ts = bpf_ktime_get_ns();
	bpf_map_update_elem(&exec_start, &pid, &ts, BPF_ANY);

	/* don't emit exec events when minimum duration is specified */
	if (min_duration_ns)
		return 0;

	/* reserve sample from BPF ringbuf */
	e = bpf_ringbuf_reserve(&rb, sizeof(*e), 0);
	if (!e)
		return 0;

	/* fill out the sample with data */
	task = (struct task_struct *)bpf_get_current_task();

	e->exit_event = false;
	e->pid = pid;
	e->ppid = BPF_CORE_READ(task, real_parent, tgid);
	bpf_get_current_comm(&e->comm, sizeof(e->comm));

	fname_off = ctx->__data_loc_filename & 0xFFFF;
	bpf_probe_read_str(&e->filename, sizeof(e->filename), (void *)ctx + fname_off);

	/* successfully submit it to user-space for post-processing */
	bpf_ringbuf_submit(e, 0);
	return 0;
}

SEC("tp/sched/sched_process_exit")
int handle_exit(struct trace_event_raw_sched_process_template *ctx)
{
	struct task_struct *task;
	struct event *e;
	pid_t pid, tid;
	u64 id, ts, *start_ts, duration_ns = 0;

	/* get PID and TID of exiting thread/process */
	id = bpf_get_current_pid_tgid();
	pid = id >> 32;
	tid = (u32)id;

	/* ignore thread exits */
	if (pid != tid)
		return 0;

	/* if we recorded start of the process, calculate lifetime duration */
	start_ts = bpf_map_lookup_elem(&exec_start, &pid);
	if (start_ts)
		duration_ns = bpf_ktime_get_ns() - *start_ts;
	else if (min_duration_ns)
		return 0;
	bpf_map_delete_elem(&exec_start, &pid);

	/* if process didn't live long enough, return early */
	if (min_duration_ns && duration_ns < min_duration_ns)
		return 0;

	/* reserve sample from BPF ringbuf */
	e = bpf_ringbuf_reserve(&rb, sizeof(*e), 0);
	if (!e)
		return 0;

	/* fill out the sample with data */
	task = (struct task_struct *)bpf_get_current_task();

	e->exit_event = true;
	e->duration_ns = duration_ns;
	e->pid = pid;
	e->ppid = BPF_CORE_READ(task, real_parent, tgid);
	e->exit_code = (BPF_CORE_READ(task, exit_code) >> 8) & 0xff;
	bpf_get_current_comm(&e->comm, sizeof(e->comm));

	/* send data to user-space for post-processing */
	bpf_ringbuf_submit(e, 0);
	return 0;
}
Function Description

The main goal of this BPF program is to track process execution and exit events in Linux. It uses the tracepoint mechanism to track sched/sched_process_exec and sched/sched_process_exit events, and then records relevant information.

Header file introduction instructions

#include <bpf/bpf_core_read.h> This header file provides the BPF_CORE_READ macro for BPF programs, which can be used to read field values ​​of the kernel data structure. This is part of the BPF CO-RE (Compile Once, Run Everywhere) technology, which enables BPF programs to run on different versions of the kernel, even if the data structures of these kernels are different.

In the code given, BPF_CORE_READ is used to read the following fields:

  1. BPF_CORE_READ(task, real_parent, tgid): Read the process ID (PID) of the parent process of the current process. real_parent is a field in the task_struct structure, indicating the parent process of the process. tgid is a field in the task_struct structure, indicating the thread group ID. For a single-threaded process, it is the PID.
  2. BPF_CORE_READ(task, exit_code): Read the exit status code of the process. exit_code is a field in the task_struct structure that represents the exit status code of the process.

bpf_probe_read_str reads filename
fname_off = ctx->__data_loc_filename & 0xFFFF;
bpf_probe_read_str(&e->filename, sizeof(e->filename), (void *)ctx + fname_off);

Here you need to look at a structure first: trace_event_raw_sched_process_exec

struct trace_event_raw_sched_process_exec {
	struct trace_entry ent;
	u32 __data_loc_filename;
	pid_t pid;
	pid_t old_pid;
	char __data[0];
};

This is actually a technique using flexible arrays

  • struct trace_entry ent;: trace_entry is a general structure used to represent a trace event in the Linux tracepoints mechanism. It contains some basic information, such as the timestamp of the event, process ID (PID), thread ID (TID), etc.
  • u32 __data_loc_filename;: This field is a 32-bit integer containing the offset (lower 16 bits) and length (higher 16 bits) of the filename in the context. The filename is the name of the file executed by the process.
  • pid_t pid;: This field represents the process ID (PID) of the newly started process.
  • pid_t old_pid;: This field represents the process ID of the process that was originally executing. When a new process starts executing, the original executing process will be replaced.
  • char __data[0];: This is a zero-length array used to represent the data that may follow the structure. This technique is commonly used in C to represent a variable length data. In this structure, __data is used to store the actual data of the file name. The location of the filename can be determined via the __data_loc_filename field.

Therefore, if you want to know the address of the actual file name, you need to use (void *)ctx + fname_off

The bpf_probe_read_str function is a function in the BPF (Berkeley Packet Filter) library. It is used to read a null-terminated string from user space or kernel space.

int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr);
bpf_ringbuf_submit submits information to the BPF ring buffer

BPF ring buffer is a mechanism for transferring data between kernel and user space. BPF programs run in the kernel, and when they need to transfer data to user space (for example, to transfer data to a monitoring tool or diagnostic tool), they can write the data to the ring buffer, and then the user space program can read the data from the ring buffer. Read data in.

The bpf_ringbuf_submit function is used to write data to the ring buffer. After this function is called, the user space program can read the data in e from the ring buffer. This mechanism allows BPF programs to efficiently transfer data to user space without using complex and time-consuming system calls.

void bpf_ringbuf_submit(void *data, u64 flags);

The parameters are explained as follows:

  • void *data: This is a pointer to the data you want to submit to the ring buffer. This is usually a pointer to a structure in your BPF program that contains the data you want to transfer to user space.
  • u64 flags: This is a flag bit used to control the behavior of the function. As of now, this parameter should always be set to 0 since no flags are defined yet.

bpf_map_delete_elem

Remove the record from the exec_start map because the process has exited and is no longer needed.

User program part
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
/* Copyright (c) 2020 Facebook */
#include <argp.h>
#include <signal.h>
#include <stdio.h>
#include <time.h>
#include <sys/resource.h>
#include <bpf/libbpf.h>
#include "bootstrap.h"
#include "bootstrap.skel.h"

static struct env {
	bool verbose;
	long min_duration_ms;
} env;

const char *argp_program_version = "bootstrap 0.0";
const char *argp_program_bug_address = "<[email protected]>";
const char argp_program_doc[] = "BPF bootstrap demo application.\n"
				"\n"
				"It traces process start and exits and shows associated \n"
				"information (filename, process duration, PID and PPID, etc).\n"
				"\n"
				"USAGE: ./bootstrap [-d <min-duration-ms>] [-v]\n";

static const struct argp_option opts[] = {
	{ "verbose", 'v', NULL, 0, "Verbose debug output" },
	{ "duration", 'd', "DURATION-MS", 0, "Minimum process duration (ms) to report" },
	{},
};

static error_t parse_arg(int key, char *arg, struct argp_state *state)
{
	switch (key) {
	case 'v':
		env.verbose = true;
		break;
	case 'd':
		errno = 0;
		env.min_duration_ms = strtol(arg, NULL, 10);
		if (errno || env.min_duration_ms <= 0) {
			fprintf(stderr, "Invalid duration: %s\n", arg);
			argp_usage(state);
		}
		break;
	case ARGP_KEY_ARG:
		argp_usage(state);
		break;
	default:
		return ARGP_ERR_UNKNOWN;
	}
	return 0;
}

static const struct argp argp = {
	.options = opts,
	.parser = parse_arg,
	.doc = argp_program_doc,
};

static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
{
	if (level == LIBBPF_DEBUG && !env.verbose)
		return 0;
	return vfprintf(stderr, format, args);
}

static volatile bool exiting = false;

static void sig_handler(int sig)
{
	exiting = true;
}

static int handle_event(void *ctx, void *data, size_t data_sz)
{
	const struct event *e = data;
	struct tm *tm;
	char ts[32];
	time_t t;

	time(&t);
	tm = localtime(&t);
	strftime(ts, sizeof(ts), "%H:%M:%S", tm);

	if (e->exit_event) {
		printf("%-8s %-5s %-16s %-7d %-7d [%u]", ts, "EXIT", e->comm, e->pid, e->ppid,
		       e->exit_code);
		if (e->duration_ns)
			printf(" (%llums)", e->duration_ns / 1000000);
		printf("\n");
	} else {
		printf("%-8s %-5s %-16s %-7d %-7d %s\n", ts, "EXEC", e->comm, e->pid, e->ppid,
		       e->filename);
	}

	return 0;
}

int main(int argc, char **argv)
{
	struct ring_buffer *rb = NULL;
	struct bootstrap_bpf *skel;
	int err;

	/* Parse command line arguments */
	err = argp_parse(&argp, argc, argv, 0, NULL, NULL);
	if (err)
		return err;

	/* Set up libbpf errors and debug info callback */
	libbpf_set_print(libbpf_print_fn);

	/* Cleaner handling of Ctrl-C */
	signal(SIGINT, sig_handler);
	signal(SIGTERM, sig_handler);

	/* Load and verify BPF application */
	skel = bootstrap_bpf__open();
	if (!skel) {
		fprintf(stderr, "Failed to open and load BPF skeleton\n");
		return 1;
	}

	/* Parameterize BPF code with minimum duration parameter */
	skel->rodata->min_duration_ns = env.min_duration_ms * 1000000ULL;

	/* Load & verify BPF programs */
	err = bootstrap_bpf__load(skel);
	if (err) {
		fprintf(stderr, "Failed to load and verify BPF skeleton\n");
		goto cleanup;
	}

	/* Attach tracepoints */
	err = bootstrap_bpf__attach(skel);
	if (err) {
		fprintf(stderr, "Failed to attach BPF skeleton\n");
		goto cleanup;
	}

	/* Set up ring buffer polling */
	rb = ring_buffer__new(bpf_map__fd(skel->maps.rb), handle_event, NULL, NULL);
	if (!rb) {
		err = -1;
		fprintf(stderr, "Failed to create ring buffer\n");
		goto cleanup;
	}

	/* Process events */
	printf("%-8s %-5s %-16s %-7s %-7s %s\n", "TIME", "EVENT", "COMM", "PID", "PPID",
	       "FILENAME/EXIT CODE");
	while (!exiting) {
		err = ring_buffer__poll(rb, 100 /* timeout, ms */);
		/* Ctrl-C will cause -EINTR */
		if (err == -EINTR) {
			err = 0;
			break;
		}
		if (err < 0) {
			printf("Error polling perf buffer: %d\n", err);
			break;
		}
	}

cleanup:
	/* Clean up */
	ring_buffer__free(rb);
	bootstrap_bpf__destroy(skel);

	return err < 0 ? -err : 0;
}
ring_buffer__new

rb = ring_buffer__new(bpf_map__fd(skel->maps.rb), handle_event, NULL, NULL);

  1. bpf_map__fd(skel->maps.rb): This function call obtains the file descriptor of the BPF map, which is the ring buffer defined in the BPF program. skel->maps.rb is a reference to this ring buffer map, where skel is an instance of the BPF program structure that is loaded and verified.
  2. handle_event: This is a function pointer. The function pointed to will be called every time data is read from the ring buffer. In this example, the handle_event function handles every event sent from the BPF program.
  3. NULL: These two NULL parameters represent the context object and destructor of the ring buffer respectively. In this example, none of them are used, so they are set to NULL.
  4. ring_buffer__new: This function creates a new ring buffer instance. This new instance will be used to read data sent from the BPF program.

If the ring buffer is created successfully, the ring_buffer__new function will return a pointer to the new ring buffer. If creation fails, it returns NULL. In this example, the returned pointer is stored in the rb variable.

ring_buffer__poll

The purpose is to check if there is new data in the ring buffer, which may come from a BPF program. If there is new data, the ring_buffer__poll function will trigger the callback function (handle_event) specified when the ring buffer was created, and pass the new data as a parameter to the function.

Here's a detailed explanation of this line of code:

err = ring_buffer__poll(rb, 100 /* timeout, ms */);

  1. rb: This is a pointer to the ring buffer that we want to poll.
  2. 100: This is the timeout for the polling operation, in milliseconds. This means that if there is no new data in the ring buffer within 100 milliseconds, the ring_buffer__poll function will return a timeout error.
  3. ring_buffer__poll: This function blocks the current thread until one of the following conditions is met: there is new data in the ring buffer, a timeout occurs (100 milliseconds in this example), or an interrupt signal is received.
  4. err: This variable stores the return value of the ring_buffer__poll function. If the function successfully reads data from the ring buffer, it returns the number of events read. If a timeout occurs, it returns 0. If an error occurs (e.g. due to an interrupt), it will return a negative error code.
ring_buffer__free

ring_buffer__free is a function used to release the ring buffer previously created through ring_buffer__new.

Execution effect

TIME     EVENT COMM             PID     PPID    FILENAME/EXIT CODE
21:29:15 EXEC  gio-launch-desk  34135   2947    /usr/lib/x86_64-linux-gnu/glib-2.0/gio-launch-desktop
21:29:15 EXEC  google-chrome-s  34135   2947    /usr/bin/google-chrome-stable
21:29:15 EXEC  readlink         34138   34135   /usr/bin/readlink
21:29:15 EXIT  readlink         34138   34135   [0] (0ms)
21:29:15 EXEC  dirname          34139   34135   /usr/bin/dirname
21:29:15 EXIT  dirname          34139   34135   [0] (0ms)
21:29:15 EXEC  mkdir            34140   34135   /usr/bin/mkdir
21:29:15 EXIT  mkdir            34140   34135   [0] (0ms)
21:29:15 EXEC  cat              34141   34135   /usr/bin/cat
21:29:15 EXEC  chrome           34135   2947    /opt/google/chrome/chrome
21:29:15 EXEC  cat              34142   34135   /usr/bin/cat
21:29:15 EXEC  chrome_crashpad  34144   34143   /opt/google/chrome/chrome_crashpad_handler
21:29:15 EXIT  chrome           34143   34135   [0]
21:29:15 EXEC  chrome_crashpad  34146   34145   /opt/google/chrome/chrome_crashpad_handler
21:29:15 EXIT  chrome_crashpad  34145   34144   [0]
21:29:15 EXIT  chrome           34151   34135   [0]
21:29:15 EXEC  chrome           34152   34135   /opt/google/chrome/chrome
21:29:15 EXEC  chrome           34153   34135   /opt/google/chrome/chrome
21:29:15 EXEC  nacl_helper      34154   34153   /opt/google/chrome/nacl_helper

Guess you like

Origin blog.csdn.net/qq_32378713/article/details/131744608