diff --git a/Documentation/input/event-codes.rst b/Documentation/input/event-codes.rst index 4424cbff251f87..77a6c9b3956d56 100644 --- a/Documentation/input/event-codes.rst +++ b/Documentation/input/event-codes.rst @@ -241,6 +241,12 @@ A few EV_ABS codes have special meanings: emitted only when the selected profile changes, indicating the newly selected profile value. +* ABS_SND_PROFILE: + + - Used to describe the state of a multi-value sound profile switch. + An event is emitted only when the selected profile changes, + indicating the newly selected profile value. + * ABS_MT_: - Used to describe multitouch input events. Please see diff --git a/Makefile b/Makefile index e404e4767944ed..3cd00b62cde99c 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 19 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Baby Opossum Posse # *DOCUMENTATION* diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 93173f0a09c7de..922d58abbbd672 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1587,6 +1587,7 @@ config ARCH_SELECTS_KEXEC_FILE def_bool y depends on KEXEC_FILE select HAVE_IMA_KEXEC if IMA + select KEXEC_BPF if DEBUG_INFO_BTF && BPF_SYSCALL config ARCH_SUPPORTS_KEXEC_SIG def_bool y diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c index 337d2dc81b4ca9..c5865b0d2aaaf6 100644 --- a/drivers/hid/hid-debug.c +++ b/drivers/hid/hid-debug.c @@ -3513,6 +3513,7 @@ static const char *absolutes[ABS_CNT] = { [ABS_DISTANCE] = "Distance", [ABS_TILT_X] = "XTilt", [ABS_TILT_Y] = "YTilt", [ABS_TOOL_WIDTH] = "ToolWidth", [ABS_VOLUME] = "Volume", [ABS_PROFILE] = "Profile", + [ABS_SND_PROFILE] = "SoundProfile", [ABS_MISC] = "Misc", [ABS_MT_SLOT] = "MTSlot", [ABS_MT_TOUCH_MAJOR] = "MTMajor", diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index d72e89c25e5031..363d509493866a 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -133,6 +133,8 @@ static const struct xpad_device { } xpad_device[] = { /* Please keep this list sorted by vendor and product ID. */ { 0x0079, 0x18d4, "GPD Win 2 X-Box Controller", 0, XTYPE_XBOX360 }, + { 0x0351, 0x1000, "CRKD LP Blueberry Burst Pro Edition (Xbox)", 0, XTYPE_XBOX360 }, + { 0x0351, 0x2000, "CRKD LP Black Tribal Edition (Xbox) ", 0, XTYPE_XBOX360 }, { 0x03eb, 0xff01, "Wooting One (Legacy)", 0, XTYPE_XBOX360 }, { 0x03eb, 0xff02, "Wooting Two (Legacy)", 0, XTYPE_XBOX360 }, { 0x03f0, 0x038D, "HyperX Clutch", 0, XTYPE_XBOX360 }, /* wired */ @@ -420,6 +422,7 @@ static const struct xpad_device { { 0x3285, 0x0663, "Nacon Evol-X", 0, XTYPE_XBOXONE }, { 0x3537, 0x1004, "GameSir T4 Kaleid", 0, XTYPE_XBOX360 }, { 0x3537, 0x1010, "GameSir G7 SE", 0, XTYPE_XBOXONE }, + { 0x3651, 0x1000, "CRKD SG", 0, XTYPE_XBOX360 }, { 0x366c, 0x0005, "ByoWave Proteus Controller", MAP_SHARE_BUTTON, XTYPE_XBOXONE, FLAG_DELAY_INIT }, { 0x3767, 0x0101, "Fanatec Speedster 3 Forceshock Wheel", 0, XTYPE_XBOX }, { 0x37d7, 0x2501, "Flydigi Apex 5", 0, XTYPE_XBOX360 }, @@ -518,6 +521,7 @@ static const struct usb_device_id xpad_table[] = { */ { USB_INTERFACE_INFO('X', 'B', 0) }, /* Xbox USB-IF not-approved class */ XPAD_XBOX360_VENDOR(0x0079), /* GPD Win 2 controller */ + XPAD_XBOX360_VENDOR(0x0351), /* CRKD Controllers */ XPAD_XBOX360_VENDOR(0x03eb), /* Wooting Keyboards (Legacy) */ XPAD_XBOX360_VENDOR(0x03f0), /* HP HyperX Xbox 360 controllers */ XPAD_XBOXONE_VENDOR(0x03f0), /* HP HyperX Xbox One controllers */ @@ -578,6 +582,7 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOXONE_VENDOR(0x3285), /* Nacon Evol-X */ XPAD_XBOX360_VENDOR(0x3537), /* GameSir Controllers */ XPAD_XBOXONE_VENDOR(0x3537), /* GameSir Controllers */ + XPAD_XBOX360_VENDOR(0x3651), /* CRKD Controllers */ XPAD_XBOXONE_VENDOR(0x366c), /* ByoWave controllers */ XPAD_XBOX360_VENDOR(0x37d7), /* Flydigi Controllers */ XPAD_XBOX360_VENDOR(0x413d), /* Black Shark Green Ghost Controller */ diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c index 6c999d89ee4bd0..422e28ad1e8e2b 100644 --- a/drivers/input/keyboard/atkbd.c +++ b/drivers/input/keyboard/atkbd.c @@ -1937,6 +1937,13 @@ static const struct dmi_system_id atkbd_dmi_quirk_table[] __initconst = { }, .callback = atkbd_deactivate_fixup, }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HONOR"), + DMI_MATCH(DMI_PRODUCT_NAME, "FMB-P"), + }, + .callback = atkbd_deactivate_fixup, + }, { } }; diff --git a/drivers/input/keyboard/lkkbd.c b/drivers/input/keyboard/lkkbd.c index c035216dd27c12..2f130f819363c6 100644 --- a/drivers/input/keyboard/lkkbd.c +++ b/drivers/input/keyboard/lkkbd.c @@ -670,7 +670,8 @@ static int lkkbd_connect(struct serio *serio, struct serio_driver *drv) return 0; - fail3: serio_close(serio); + fail3: disable_work_sync(&lk->tq); + serio_close(serio); fail2: serio_set_drvdata(serio, NULL); fail1: input_free_device(input_dev); kfree(lk); @@ -684,6 +685,8 @@ static void lkkbd_disconnect(struct serio *serio) { struct lkkbd *lk = serio_get_drvdata(serio); + disable_work_sync(&lk->tq); + input_get_device(lk->dev); input_unregister_device(lk->dev); serio_close(serio); diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c index d0cb9fb9482185..df8953a5196e1b 100644 --- a/drivers/input/mouse/alps.c +++ b/drivers/input/mouse/alps.c @@ -2975,6 +2975,7 @@ static void alps_disconnect(struct psmouse *psmouse) psmouse_reset(psmouse); timer_shutdown_sync(&priv->timer); + disable_delayed_work_sync(&priv->dev3_register_work); if (priv->dev2) input_unregister_device(priv->dev2); if (!IS_ERR_OR_NULL(priv->dev3)) diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index 1caa6c4ca435c7..654771275ce878 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -1169,6 +1169,13 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "X5KK45xS_X5SP45xS"), + }, + .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | + SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + }, /* * A lot of modern Clevo barebones have touchpad and/or keyboard issues * after suspend fixable with the forcenorestore quirk. diff --git a/drivers/input/touchscreen/apple_z2.c b/drivers/input/touchscreen/apple_z2.c index 0de161eae59a05..271ababf0ad559 100644 --- a/drivers/input/touchscreen/apple_z2.c +++ b/drivers/input/touchscreen/apple_z2.c @@ -21,6 +21,7 @@ #define APPLE_Z2_TOUCH_STARTED 3 #define APPLE_Z2_TOUCH_MOVED 4 #define APPLE_Z2_CMD_READ_INTERRUPT_DATA 0xEB +#define APPLE_Z2_REPLY_INTERRUPT_DATA 0xE1 #define APPLE_Z2_HBPP_CMD_BLOB 0x3001 #define APPLE_Z2_FW_MAGIC 0x5746325A #define LOAD_COMMAND_INIT_PAYLOAD 0 @@ -142,6 +143,9 @@ static int apple_z2_read_packet(struct apple_z2 *z2) if (error) return error; + if (z2->rx_buf[0] != APPLE_Z2_REPLY_INTERRUPT_DATA) + return 0; + pkt_len = (get_unaligned_le16(z2->rx_buf + 1) + 8) & 0xfffffffc; error = spi_read(z2->spidev, z2->rx_buf, pkt_len); diff --git a/drivers/input/touchscreen/ti_am335x_tsc.c b/drivers/input/touchscreen/ti_am335x_tsc.c index d6edfab1677040..0534b2ba650bbf 100644 --- a/drivers/input/touchscreen/ti_am335x_tsc.c +++ b/drivers/input/touchscreen/ti_am335x_tsc.c @@ -85,7 +85,7 @@ static int titsc_config_wires(struct titsc *ts_dev) wire_order[i] = ts_dev->config_inp[i] & 0x0F; if (WARN_ON(analog_line[i] > 7)) return -EINVAL; - if (WARN_ON(wire_order[i] > ARRAY_SIZE(config_pins))) + if (WARN_ON(wire_order[i] >= ARRAY_SIZE(config_pins))) return -EINVAL; } diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e5be698256d15a..25bc1b6b8a6005 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -3843,4 +3843,23 @@ static inline int bpf_map_check_op_flags(struct bpf_map *map, u64 flags, u64 all return 0; } +struct bpf_parser_buf { + char *buf; + int size; +}; + +struct bpf_parser_context; +typedef int (*bpf_parser_handler_t)(struct bpf_parser_context *ctx); + +struct bpf_parser_context { + struct kref ref; + struct hlist_node hash_node; + bpf_parser_handler_t func; + struct bpf_parser_buf *buf; + void *data; +}; + +struct bpf_parser_context *alloc_bpf_parser_context(bpf_parser_handler_t func, + void *data); +void put_bpf_parser_context(struct bpf_parser_context *ctx); #endif /* _LINUX_BPF_H */ diff --git a/include/linux/decompress/mm.h b/include/linux/decompress/mm.h index ac862422df158b..39df02bcbc6618 100644 --- a/include/linux/decompress/mm.h +++ b/include/linux/decompress/mm.h @@ -81,6 +81,7 @@ MALLOC_VISIBLE void free(void *where) #include #include #include +#include /* Use defines rather than static inline in order to avoid spurious * warnings when not needed (indeed large_malloc / large_free are not @@ -92,7 +93,14 @@ MALLOC_VISIBLE void free(void *where) #define large_malloc(a) vmalloc(a) #define large_free(a) vfree(a) +#ifdef CONFIG_KEEP_DECOMPRESSOR +#define INIT +#define INITCONST +#else #define INIT __init +#define INITCONST __initconst +#endif + #define STATIC #include diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index 30f3c9eaafaad9..4bdb6a1659873d 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -891,6 +891,7 @@ #define ABS_VOLUME 0x20 #define ABS_PROFILE 0x21 +#define ABS_SND_PROFILE 0x22 #define ABS_MISC 0x28 @@ -1000,4 +1001,12 @@ #define SND_MAX 0x07 #define SND_CNT (SND_MAX+1) +/* + * ABS_SND_PROFILE values + */ + +#define SND_PROFILE_SILENT 0x00 +#define SND_PROFILE_VIBRATE 0x01 +#define SND_PROFILE_RING 0x02 + #endif diff --git a/kernel/Kconfig.kexec b/kernel/Kconfig.kexec index 15632358bcf711..dbfdf34a78aa0e 100644 --- a/kernel/Kconfig.kexec +++ b/kernel/Kconfig.kexec @@ -46,6 +46,14 @@ config KEXEC_FILE for kernel and initramfs as opposed to list of segments as accepted by kexec system call. +config KEXEC_BPF + bool "Enable bpf-prog to parse the kexec image" + depends on KEXEC_FILE + depends on DEBUG_INFO_BTF && BPF_SYSCALL && KEEP_DECOMPRESSOR + help + This is a feature to run bpf section inside a kexec image file, which + parses the image properly and help kernel set up kexec boot protocol + config KEXEC_SIG bool "Verify kernel signature during kexec_file_load() syscall" depends on ARCH_SUPPORTS_KEXEC_SIG diff --git a/kernel/Makefile b/kernel/Makefile index e83669841b8cc6..a1c5f5fb02770f 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -83,6 +83,7 @@ obj-$(CONFIG_CRASH_DUMP_KUNIT_TEST) += crash_core_test.o obj-$(CONFIG_KEXEC) += kexec.o obj-$(CONFIG_KEXEC_FILE) += kexec_file.o obj-$(CONFIG_KEXEC_ELF) += kexec_elf.o +obj-$(CONFIG_KEXEC_BPF) += kexec_bpf_loader.o kexec_uefi_app.o obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o obj-$(CONFIG_COMPAT) += compat.o obj-$(CONFIG_CGROUPS) += cgroup/ @@ -142,6 +143,7 @@ obj-$(CONFIG_SYSCTL_KUNIT_TEST) += sysctl-test.o CFLAGS_kstack_erase.o += $(DISABLE_KSTACK_ERASE) CFLAGS_kstack_erase.o += $(call cc-option,-mgeneral-regs-only) +CFLAGS_kexec_bpf_loader.o += -I$(srctree)/tools/lib obj-$(CONFIG_KSTACK_ERASE) += kstack_erase.o KASAN_SANITIZE_kstack_erase.o := n KCSAN_SANITIZE_kstack_erase.o := n diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 232cbc97434db9..309b905a81736b 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -56,6 +56,9 @@ obj-$(CONFIG_BPF_SYSCALL) += kmem_cache_iter.o ifeq ($(CONFIG_DMA_SHARED_BUFFER),y) obj-$(CONFIG_BPF_SYSCALL) += dmabuf_iter.o endif +ifeq ($(CONFIG_KEXEC_BPF),y) +obj-$(CONFIG_BPF_SYSCALL) += bpf_buffer_parser.o +endif CFLAGS_REMOVE_percpu_freelist.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_bpf_lru_list.o = $(CC_FLAGS_FTRACE) diff --git a/kernel/bpf/bpf_buffer_parser.c b/kernel/bpf/bpf_buffer_parser.c new file mode 100644 index 00000000000000..1d2f1cfb5788cd --- /dev/null +++ b/kernel/bpf/bpf_buffer_parser.c @@ -0,0 +1,167 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include + +#define BPF_CONTEXT_HASH_BITS 10 + +static DEFINE_SPINLOCK(bpf_parser_context_lock); +static DEFINE_HASHTABLE(bpf_parser_context_map, BPF_CONTEXT_HASH_BITS); + +/* Generate a simple hash key from pointer address */ +static inline unsigned int bpf_parser_context_hash_key(struct bpf_parser_context *ctx) +{ + return hash_ptr(ctx, BPF_CONTEXT_HASH_BITS); +} + +static void release_bpf_parser_context(struct kref *kref) +{ + struct bpf_parser_context *ctx = container_of(kref, struct bpf_parser_context, ref); + + if (!!ctx->buf) { + vfree(ctx->buf->buf); + kfree(ctx->buf); + } + spin_lock(&bpf_parser_context_lock); + hash_del(&ctx->hash_node); + spin_unlock(&bpf_parser_context_lock); + kfree(ctx); +} + +struct bpf_parser_context *alloc_bpf_parser_context(bpf_parser_handler_t func, + void *data) +{ + struct bpf_parser_context *ctx; + unsigned int key; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return NULL; + ctx->func = func; + ctx->data = data; + kref_init(&ctx->ref); + key = bpf_parser_context_hash_key(ctx); + spin_lock(&bpf_parser_context_lock); + hash_add(bpf_parser_context_map, &ctx->hash_node, key); + spin_unlock(&bpf_parser_context_lock); + + return ctx; +} + +void put_bpf_parser_context(struct bpf_parser_context *ctx) +{ + if (!ctx) + return; + kref_put(&ctx->ref, release_bpf_parser_context); +} + +static struct bpf_parser_context *find_bpf_parser_context(unsigned long id) +{ + unsigned int key = bpf_parser_context_hash_key((struct bpf_parser_context *)id); + struct bpf_parser_context *ctx; + + spin_lock(&bpf_parser_context_lock); + hash_for_each_possible(bpf_parser_context_map, ctx, hash_node, key) { + if (ctx == (struct bpf_parser_context *)id) { + spin_unlock(&bpf_parser_context_lock); + return ctx; + } + } + spin_unlock(&bpf_parser_context_lock); + + return NULL; +} + +__bpf_kfunc_start_defs() + +__bpf_kfunc struct bpf_parser_context *bpf_get_parser_context(unsigned long id) +{ + struct bpf_parser_context *ctx; + + ctx = find_bpf_parser_context(id); + if (ctx) + kref_get(&ctx->ref); + + return ctx; +} + +__bpf_kfunc void bpf_put_parser_context(struct bpf_parser_context *ctx) +{ + put_bpf_parser_context(ctx); +} + +__bpf_kfunc void bpf_parser_context_release_dtor(void *ctx) +{ + put_bpf_parser_context(ctx); +} +CFI_NOSEAL(bpf_parser_context_release_dtor); + +__bpf_kfunc int bpf_buffer_parser(char *buf, int buf_sz, + struct bpf_parser_context *context) +{ + struct bpf_parser_buf *parser_buf; + int ret; + char *b; + + if (unlikely(context->func == NULL)) + return -EINVAL; + + b = __vmalloc(buf_sz, GFP_KERNEL_ACCOUNT | __GFP_ZERO); + if (!b) + return -ENOMEM; + ret = copy_from_kernel_nofault(b, buf, buf_sz); + if (!!ret) { + vfree(b); + return ret; + } + + parser_buf = kmalloc(sizeof(struct bpf_parser_buf), GFP_KERNEL); + if (!parser_buf) { + vfree(b); + return -ENOMEM; + } + parser_buf->buf = b; + parser_buf->size = buf_sz; + context->buf = parser_buf; + ret = context->func(context); + + return ret; +} +__bpf_kfunc_end_defs(); + +BTF_KFUNCS_START(buffer_parser_ids) +BTF_ID_FLAGS(func, bpf_get_parser_context, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_put_parser_context, KF_RELEASE) +BTF_ID_FLAGS(func, bpf_buffer_parser, KF_TRUSTED_ARGS | KF_SLEEPABLE) +BTF_KFUNCS_END(buffer_parser_ids) + +static const struct btf_kfunc_id_set buffer_parser_kfunc_set = { + .owner = THIS_MODULE, + .set = &buffer_parser_ids, +}; + + +BTF_ID_LIST(buffer_parser_dtor_ids) +BTF_ID(struct, bpf_parser_context) +BTF_ID(func, bpf_parser_context_release_dtor) + +static int __init buffer_parser_kfunc_init(void) +{ + int ret; + const struct btf_id_dtor_kfunc buffer_parser_dtors[] = { + { + .btf_id = buffer_parser_dtor_ids[0], + .kfunc_btf_id = buffer_parser_dtor_ids[1] + }, + }; + + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &buffer_parser_kfunc_set); + return ret ?: register_btf_id_dtor_kfuncs(buffer_parser_dtors, + ARRAY_SIZE(buffer_parser_dtors), + THIS_MODULE); +} + +late_initcall(buffer_parser_kfunc_init); diff --git a/kernel/kexec_bpf/Makefile b/kernel/kexec_bpf/Makefile new file mode 100644 index 00000000000000..88e92eb910f643 --- /dev/null +++ b/kernel/kexec_bpf/Makefile @@ -0,0 +1,70 @@ +# SPDX-License-Identifier: GPL-2.0 +OUTPUT := .output +CLANG ?= clang +LLC ?= llc +LLVM_STRIP ?= llvm-strip +DEFAULT_BPFTOOL := $(OUTPUT)/sbin/bpftool +BPFTOOL ?= $(DEFAULT_BPFTOOL) +LIBBPF_SRC := $(abspath ../../tools/lib/bpf) +BPFOBJ := $(OUTPUT)/libbpf.a +BPF_INCLUDE := $(OUTPUT) +INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../tools/lib) \ + -I$(abspath ../../tools/include/uapi) +CFLAGS := -g -Wall + +srctree := $(patsubst %/kernel/kexec_bpf,%,$(CURDIR)) +VMLINUX = $(srctree)/vmlinux + +abs_out := $(abspath $(OUTPUT)) +ifeq ($(V),1) +Q = +msg = +else +Q = @ +msg = @printf ' %-8s %s%s\n' "$(1)" "$(notdir $(2))" "$(if $(3), $(3))"; +MAKEFLAGS += --no-print-directory +submake_extras := feature_display=0 +endif + +.DELETE_ON_ERROR: + +.PHONY: all clean + +all: $(OUTPUT) kexec_pe_parser_bpf.lskel.h + +clean: + $(call msg,CLEAN) + $(Q)rm -rf $(OUTPUT) kexec_pe_parser_bpf.lskel.h + +kexec_pe_parser_bpf.lskel.h: $(OUTPUT)/kexec_pe_parser_bpf.o | $(BPFTOOL) + $(call msg,GEN-SKEL,$@) + $(Q)$(BPFTOOL) gen skeleton -L $< > $@ + @# The following sed commands make opts_data[] and opts_insn[] visible in a file instead of only in a function. + @# And it removes the bytecode + $(Q) sed -i '/static const char opts_data\[\].*=/,/";$$/d' $@ + $(Q) sed -i '/static const char opts_insn\[\].*=/,/";$$/d' $@ + $(Q) sed -i \ + -e 's/opts\.data_sz = sizeof(opts_data) - 1;/opts.data_sz = opts_data_sz;/' \ + -e 's/opts\.insns_sz = sizeof(opts_insn) - 1;/opts.insns_sz = opts_insn_sz;/' $@ + $(Q) sed -i '7i static char *opts_data, *opts_insn;\nstatic unsigned int opts_data_sz, opts_insn_sz;' $@ + +$(OUTPUT)/vmlinux.h: $(VMLINUX) $(DEFAULT_BPFTOOL) $(BPFOBJ) | $(OUTPUT) + @$(BPFTOOL) btf dump file $(VMLINUX) format c > $(OUTPUT)/vmlinux.h + +$(OUTPUT)/kexec_pe_parser_bpf.o: kexec_pe_parser_bpf.c $(OUTPUT)/vmlinux.h $(BPFOBJ) | $(OUTPUT) + $(call msg,BPF,$@) + $(Q)$(CLANG) -g -O2 -target bpf $(INCLUDES) \ + -c $(filter %.c,$^) -o $@ && \ + $(LLVM_STRIP) -g $@ + +$(OUTPUT): + $(call msg,MKDIR,$@) + $(Q)mkdir -p $(OUTPUT) + +$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT) + $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) \ + OUTPUT=$(abspath $(dir $@))/ $(abspath $@) + +$(DEFAULT_BPFTOOL): + $(Q)$(MAKE) $(submake_extras) -C ../../tools/bpf/bpftool \ + prefix= OUTPUT=$(abs_out)/ DESTDIR=$(abs_out) install diff --git a/kernel/kexec_bpf/kexec_pe_parser_bpf.c b/kernel/kexec_bpf/kexec_pe_parser_bpf.c new file mode 100644 index 00000000000000..e4a24192406731 --- /dev/null +++ b/kernel/kexec_bpf/kexec_pe_parser_bpf.c @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Copyright (C) 2025, 2026 Red Hat, Inc + +#include "template.c" + +SEC("fentry.s/kexec_image_parser_anchor") +__attribute__((used)) int BPF_PROG(parse_pe, struct kexec_context *context, + unsigned long parser_id) +{ + return 0; +} diff --git a/kernel/kexec_bpf/kexec_pe_parser_bpf.lskel.h b/kernel/kexec_bpf/kexec_pe_parser_bpf.lskel.h new file mode 100644 index 00000000000000..db71b2150789db --- /dev/null +++ b/kernel/kexec_bpf/kexec_pe_parser_bpf.lskel.h @@ -0,0 +1,130 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +/* THIS FILE IS AUTOGENERATED BY BPFTOOL! */ +#ifndef __KEXEC_PE_PARSER_BPF_SKEL_H__ +#define __KEXEC_PE_PARSER_BPF_SKEL_H__ + +#include +static char *opts_data, *opts_insn; +static unsigned int opts_data_sz, opts_insn_sz; + +struct kexec_pe_parser_bpf { + struct bpf_loader_ctx ctx; + struct { + struct bpf_map_desc ringbuf_1; + struct bpf_map_desc ringbuf_2; + struct bpf_map_desc ringbuf_3; + struct bpf_map_desc ringbuf_4; + struct bpf_map_desc rodata; + struct bpf_map_desc data; + struct bpf_map_desc rodata_str1_1; + struct bpf_map_desc bss; + } maps; + struct { + struct bpf_prog_desc parse_pe; + } progs; + struct { + int parse_pe_fd; + } links; +}; + +static inline int +kexec_pe_parser_bpf__parse_pe__attach(struct kexec_pe_parser_bpf *skel) +{ + int prog_fd = skel->progs.parse_pe.prog_fd; + int fd = skel_raw_tracepoint_open(NULL, prog_fd); + + if (fd > 0) + skel->links.parse_pe_fd = fd; + return fd; +} + +static inline int +kexec_pe_parser_bpf__attach(struct kexec_pe_parser_bpf *skel) +{ + int ret = 0; + + ret = ret < 0 ? ret : kexec_pe_parser_bpf__parse_pe__attach(skel); + return ret < 0 ? ret : 0; +} + +static inline void +kexec_pe_parser_bpf__detach(struct kexec_pe_parser_bpf *skel) +{ + skel_closenz(skel->links.parse_pe_fd); +} +static void +kexec_pe_parser_bpf__destroy(struct kexec_pe_parser_bpf *skel) +{ + if (!skel) + return; + kexec_pe_parser_bpf__detach(skel); + skel_closenz(skel->progs.parse_pe.prog_fd); + skel_closenz(skel->maps.ringbuf_1.map_fd); + skel_closenz(skel->maps.ringbuf_2.map_fd); + skel_closenz(skel->maps.ringbuf_3.map_fd); + skel_closenz(skel->maps.ringbuf_4.map_fd); + skel_closenz(skel->maps.rodata.map_fd); + skel_closenz(skel->maps.data.map_fd); + skel_closenz(skel->maps.rodata_str1_1.map_fd); + skel_closenz(skel->maps.bss.map_fd); + skel_free(skel); +} +static inline struct kexec_pe_parser_bpf * +kexec_pe_parser_bpf__open(void) +{ + struct kexec_pe_parser_bpf *skel; + + skel = skel_alloc(sizeof(*skel)); + if (!skel) + goto cleanup; + skel->ctx.sz = (void *)&skel->links - (void *)skel; + return skel; +cleanup: + kexec_pe_parser_bpf__destroy(skel); + return NULL; +} + +static inline int +kexec_pe_parser_bpf__load(struct kexec_pe_parser_bpf *skel) +{ + struct bpf_load_and_run_opts opts = {}; + int err; + opts.ctx = (struct bpf_loader_ctx *)skel; + opts.data_sz = opts_data_sz; + opts.data = (void *)opts_data; + opts.insns_sz = opts_insn_sz; + opts.insns = (void *)opts_insn; + + err = bpf_load_and_run(&opts); + if (err < 0) + return err; + return 0; +} + +static inline struct kexec_pe_parser_bpf * +kexec_pe_parser_bpf__open_and_load(void) +{ + struct kexec_pe_parser_bpf *skel; + + skel = kexec_pe_parser_bpf__open(); + if (!skel) + return NULL; + if (kexec_pe_parser_bpf__load(skel)) { + kexec_pe_parser_bpf__destroy(skel); + return NULL; + } + return skel; +} + +__attribute__((unused)) static void +kexec_pe_parser_bpf__assert(struct kexec_pe_parser_bpf *s __attribute__((unused))) +{ +#ifdef __cplusplus +#define _Static_assert static_assert +#endif +#ifdef __cplusplus +#undef _Static_assert +#endif +} + +#endif /* __KEXEC_PE_PARSER_BPF_SKEL_H__ */ diff --git a/kernel/kexec_bpf/template.c b/kernel/kexec_bpf/template.c new file mode 100644 index 00000000000000..9f17a4952ecd49 --- /dev/null +++ b/kernel/kexec_bpf/template.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Copyright (C) 2026 Red Hat, Inc +// +// Original file: kernel/kexec_bpf/template.c +// +#include "vmlinux.h" +#include +#include +#include +#include + +/* Command to kernel kexec bpf loader, which is defined on the stream */ +#define KEXEC_BPF_CMD_DECOMPRESS 0x1 +#define KEXEC_BPF_CMD_COPY 0x2 + +#define KEXEC_BPF_SUBCMD_KERNEL 0x1 +#define KEXEC_BPF_SUBCMD_INITRD 0x2 +#define KEXEC_BPF_SUBCMD_CMDLINE 0x3 + +/* + * The ringbufs can have different capacity. But only four ringbuf are provided. + */ +#ifndef RINGBUF1_SIZE +#define RINGBUF1_SIZE 4 +#endif +#ifndef RINGBUF2_SIZE +#define RINGBUF2_SIZE 4 +#endif +#ifndef RINGBUF3_SIZE +#define RINGBUF3_SIZE 4 +#endif +#ifndef RINGBUF4_SIZE +#define RINGBUF4_SIZE 4 +#endif + +/* ringbuf is safe since the user space has no write access to them */ +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, RINGBUF1_SIZE); +} ringbuf_1 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, RINGBUF2_SIZE); +} ringbuf_2 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, RINGBUF3_SIZE); +} ringbuf_3 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, RINGBUF4_SIZE); +} ringbuf_4 SEC(".maps"); + +char LICENSE[] SEC("license") = "GPL"; + +/* + * This function ensures that the sections .rodata, .data, .rodata.str1.1 and .bss + * are created for a bpf prog. + */ +static const char dummy_rodata[16] __attribute__((used)) = "rodata"; +static char dummy_data[16] __attribute__((used)) = "data"; +static char *dummy_mergeable_str __attribute__((used)) = ".rodata.str1.1"; +static char dummy_bss[16] __attribute__((used)); + diff --git a/kernel/kexec_bpf_loader.c b/kernel/kexec_bpf_loader.c new file mode 100644 index 00000000000000..bbf6bcae24970a --- /dev/null +++ b/kernel/kexec_bpf_loader.c @@ -0,0 +1,439 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Kexec image bpf section helpers + + * Copyright (C) 2025, 2026 Red Hat, Inc + */ + +#define pr_fmt(fmt) "kexec_file(Image): " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "kexec_internal.h" + +#include "kexec_bpf/kexec_pe_parser_bpf.lskel.h" + +static struct kexec_pe_parser_bpf *pe_parser; + +static void *get_symbol_from_elf(const char *elf_data, size_t elf_size, + const char *symbol_name, unsigned int *symbol_size) +{ + Elf_Ehdr *ehdr = (Elf_Ehdr *)elf_data; + Elf_Shdr *shdr, *dst_shdr; + const Elf_Sym *sym; + void *symbol_data; + + if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0) { + pr_err("Not a valid ELF file\n"); + return NULL; + } + + sym = elf_find_symbol(ehdr, symbol_name); + if (!sym) + return NULL; + shdr = (struct elf_shdr *)(elf_data + ehdr->e_shoff); + dst_shdr = &shdr[sym->st_shndx]; + symbol_data = (void *)(elf_data + dst_shdr->sh_offset + sym->st_value); + *symbol_size = sym->st_size; + + return symbol_data; +} + +/* Load a ELF */ +static int arm_bpf_prog(char *bpf_elf, unsigned long sz) +{ + opts_data = get_symbol_from_elf(bpf_elf, sz, "opts_data", &opts_data_sz); + opts_insn = get_symbol_from_elf(bpf_elf, sz, "opts_insn", &opts_insn_sz); + if (!opts_data || !opts_insn) + return -1; + /* + * When light skeleton generates opts_data[] and opts_insn[], it appends a + * NULL terminator at the end of string + */ + opts_data_sz = opts_data_sz - 1; + opts_insn_sz = opts_insn_sz - 1; + + pe_parser = kexec_pe_parser_bpf__open_and_load(); + if (!pe_parser) + return -1; + kexec_pe_parser_bpf__attach(pe_parser); + + return 0; +} + +static void disarm_bpf_prog(void) +{ + kexec_pe_parser_bpf__destroy(pe_parser); + pe_parser = NULL; + opts_data = NULL; + opts_insn = NULL; +} + +struct kexec_context { + bool kdump; + char *kernel; + int kernel_sz; + char *initrd; + int initrd_sz; + char *cmdline; + int cmdline_sz; +}; + +void kexec_image_parser_anchor(struct kexec_context *context, + unsigned long parser_id); + +/* + * optimize("O0") prevents inline, compiler constant propagation + * + * Let bpf be the program context pointer so that it will not be spilled into + * stack. + */ +__attribute__((used, optimize("O0"))) void kexec_image_parser_anchor( + struct kexec_context *context, + unsigned long parser_id) +{ + /* + * To prevent linker from Identical Code Folding (ICF) with kexec_image_parser_anchor, + * making them have different code. + */ + volatile int dummy = 0; + + dummy += 1; +} + + +BTF_KFUNCS_START(kexec_modify_return_ids) +BTF_ID_FLAGS(func, kexec_image_parser_anchor, KF_SLEEPABLE) +BTF_KFUNCS_END(kexec_modify_return_ids) + +static const struct btf_kfunc_id_set kexec_modify_return_set = { + .owner = THIS_MODULE, + .set = &kexec_modify_return_ids, +}; + +static int __init kexec_bpf_prog_run_init(void) +{ + return register_btf_fmodret_id_set(&kexec_modify_return_set); +} +late_initcall(kexec_bpf_prog_run_init); + +#define KEXEC_BPF_CMD_DECOMPRESS 0x1 +#define KEXEC_BPF_CMD_COPY 0x2 + +#define KEXEC_BPF_SUBCMD_KERNEL 0x1 +#define KEXEC_BPF_SUBCMD_INITRD 0x2 +#define KEXEC_BPF_SUBCMD_CMDLINE 0x3 + +struct cmd_hdr { + uint16_t cmd; + uint16_t subcmd; + uint32_t payload_len; +} __packed; + + +/* Max decompressed size is capped at 512M */ +#define MAX_UNCOMPRESSED_BUF_SIZE (1 << 29) +#define CHUNK_SIZE (1 << 23) + +struct decompress_mem_allocator { + void *chunk_start; + unsigned int chunk_size; + void *chunk_cur; + unsigned int next_idx; + char **chunk_base_addr; +}; + +/* + * This global allocator for decompression is protected by kexec lock. + */ +static struct decompress_mem_allocator dcmpr_allocator; + +/* + * Set up an active chunk to hold partial decompressed data. + */ +static char *allocate_chunk_memory(void) +{ + struct decompress_mem_allocator *a = &dcmpr_allocator; + char *p; + + if (unlikely((a->next_idx * a->chunk_size >= MAX_UNCOMPRESSED_BUF_SIZE))) + return NULL; + + p = __vmalloc(a->chunk_size, GFP_KERNEL | __GFP_ACCOUNT); + if (!p) + return NULL; + a->chunk_base_addr[a->next_idx++] = p; + a->chunk_start = a->chunk_cur = p; + + return p; +} + +static int merge_decompressed_data(struct decompress_mem_allocator *a, + char **out, unsigned int *size) +{ + unsigned int last_chunk_sz = a->chunk_cur - a->chunk_start; + unsigned long total_sz; + char *dst, *cur_dst; + int i; + + total_sz = (a->next_idx - 1) * a->chunk_size + last_chunk_sz; + cur_dst = dst = __vmalloc(total_sz, GFP_KERNEL | __GFP_ACCOUNT); + if (!dst) + return -ENOMEM; + + for (i = 0; i < a->next_idx - 1; i++) { + memcpy(cur_dst, a->chunk_base_addr[i], a->chunk_size); + cur_dst += a->chunk_size; + vfree(a->chunk_base_addr[i]); + } + + memcpy(cur_dst, a->chunk_base_addr[i], last_chunk_sz); + vfree(a->chunk_base_addr[i]); + *out = dst; + *size = total_sz; + + return 0; +} + +static int decompress_mem_allocator_init( + struct decompress_mem_allocator *a, + unsigned int chunk_size) +{ + unsigned long sz = (MAX_UNCOMPRESSED_BUF_SIZE / chunk_size) * sizeof(void *); + char *buf; + + a->chunk_base_addr = __vmalloc(sz, GFP_KERNEL | __GFP_ACCOUNT); + if (!a->chunk_base_addr) + return -ENOMEM; + + /* Pre-allocate the memory for the first chunk */ + buf = __vmalloc(chunk_size, GFP_KERNEL | __GFP_ACCOUNT); + if (!buf) { + vfree(a->chunk_base_addr); + return -ENOMEM; + } + a->chunk_base_addr[0] = buf; + a->chunk_start = a->chunk_cur = buf; + a->chunk_size = chunk_size; + a->next_idx = 1; + return 0; +} + +static void decompress_mem_allocator_fini(struct decompress_mem_allocator *a) +{ + vfree(a->chunk_base_addr); +} + +/* + * This is a callback for decompress_fn. + * + * It copies the partial decompressed content in [buf, buf + len) to dst. If the + * active chunk is not large enough, retire it and activate a new chunk to hold + * the remaining data. + */ +static long flush(void *buf, unsigned long len) +{ + struct decompress_mem_allocator *a = &dcmpr_allocator; + long free, copied = 0; + + if (unlikely(len > a->chunk_size)) { + pr_info("Chunk size is too small to hold decompressed data\n"); + return -1; + } + free = a->chunk_start + a->chunk_size - a->chunk_cur; + BUG_ON(free < 0); + if (free < len) { + memcpy(a->chunk_cur, buf, free); + copied += free; + a->chunk_cur += free; + buf += free; + len -= free; + a->chunk_start = a->chunk_cur = allocate_chunk_memory(); + if (unlikely(!a->chunk_start)) { + pr_info("Decompression runs out of memory\n"); + return -1; + } + } + memcpy(a->chunk_cur, buf, len); + copied += len; + a->chunk_cur += len; + return copied; +} + +static int parser_cmd_decompress(char *compressed_data, int image_gz_sz, + char **out_buf, int *out_sz, struct kexec_context *ctx) +{ + struct decompress_mem_allocator *a = &dcmpr_allocator; + decompress_fn decompressor; + const char *name; + int ret; + + decompress_mem_allocator_init(a, CHUNK_SIZE); + decompressor = decompress_method(compressed_data, image_gz_sz, &name); + if (!decompressor) { + pr_err("Can not find decompress method\n"); + return -1; + } + pr_debug("Find decompressing method: %s, compressed sz:0x%x\n", + name, image_gz_sz); + ret = decompressor(compressed_data, image_gz_sz, NULL, flush, + NULL, NULL, NULL); + if (!!ret) + goto err; + ret = merge_decompressed_data(a, out_buf, out_sz); + +err: + decompress_mem_allocator_fini(a); + + return ret; +} + +static int kexec_buff_parser(struct bpf_parser_context *parser) +{ + struct bpf_parser_buf *pbuf = parser->buf; + struct kexec_context *ctx = (struct kexec_context *)parser->data; + struct cmd_hdr *cmd = (struct cmd_hdr *)pbuf->buf; + char *decompressed_buf, *buf, *p; + int decompressed_sz, ret; + + buf = pbuf->buf + sizeof(struct cmd_hdr); + if (cmd->payload_len + sizeof(struct cmd_hdr) > pbuf->size) { + pr_info("Invalid payload size:0x%x, while buffer size:0x%x\n", + cmd->payload_len, pbuf->size); + return -EINVAL; + } + switch (cmd->cmd) { + case KEXEC_BPF_CMD_DECOMPRESS: + ret = parser_cmd_decompress(buf, cmd->payload_len, &decompressed_buf, + &decompressed_sz, ctx); + if (!ret) { + switch (cmd->subcmd) { + case KEXEC_BPF_SUBCMD_KERNEL: + vfree(ctx->kernel); + ctx->kernel = decompressed_buf; + ctx->kernel_sz = decompressed_sz; + break; + default: + break; + } + } + break; + case KEXEC_BPF_CMD_COPY: + p = __vmalloc(cmd->payload_len, GFP_KERNEL | __GFP_ACCOUNT); + if (!p) + return -ENOMEM; + memcpy(p, buf, cmd->payload_len); + switch (cmd->subcmd) { + case KEXEC_BPF_SUBCMD_KERNEL: + vfree(ctx->kernel); + ctx->kernel = p; + ctx->kernel_sz = cmd->payload_len; + break; + case KEXEC_BPF_SUBCMD_INITRD: + vfree(ctx->initrd); + ctx->initrd = p; + ctx->initrd_sz = cmd->payload_len; + break; + case KEXEC_BPF_SUBCMD_CMDLINE: + vfree(ctx->cmdline); + ctx->cmdline = p; + ctx->cmdline_sz = cmd->payload_len; + break; + default: + vfree(p); + break; + } + break; + default: + break; + } + + return 0; +} + +/* At present, only PE format file with .bpf section is supported */ +#define file_has_bpf_section pe_has_bpf_section +#define file_get_section pe_get_section + +int decompose_kexec_image(struct kimage *image, int extended_fd) +{ + struct kexec_context context = { 0 }; + struct bpf_parser_context *bpf; + unsigned long kernel_sz, bpf_sz; + char *kernel_start, *bpf_start; + int ret = 0; + + if (image->type != KEXEC_TYPE_CRASH) + context.kdump = false; + else + context.kdump = true; + + kernel_start = image->kernel_buf; + kernel_sz = image->kernel_buf_len; + + while (file_has_bpf_section(kernel_start, kernel_sz)) { + + bpf = alloc_bpf_parser_context(kexec_buff_parser, &context); + if (!bpf) + return -ENOMEM; + file_get_section((const char *)kernel_start, ".bpf", &bpf_start, &bpf_sz); + if (!!bpf_sz) { + /* load and attach bpf-prog */ + ret = arm_bpf_prog(bpf_start, bpf_sz); + if (ret) { + put_bpf_parser_context(bpf); + pr_err("Fail to load .bpf section\n"); + goto err; + } + } + context.kernel = kernel_start; + context.kernel_sz = kernel_sz; + /* bpf-prog fentry, which handle above buffers. */ + kexec_image_parser_anchor(&context, (unsigned long)bpf); + + /* + * Container may be nested and should be unfold one by one. + * The former bpf-prog should prepare 'kernel', 'initrd', + * 'cmdline' for the next phase by calling kexec_buff_parser() + */ + kernel_start = context.kernel; + kernel_sz = context.kernel_sz; + + /* + * detach the current bpf-prog from their attachment points. + */ + disarm_bpf_prog(); + put_bpf_parser_context(bpf); + } + + /* + * image's kernel_buf, initrd_buf, cmdline_buf are set. Now they should + * be updated to the new content. + */ + image->kernel_buf = context.kernel; + image->kernel_buf_len = context.kernel_sz; + image->initrd_buf = context.initrd; + image->initrd_buf_len = context.initrd_sz; + image->cmdline_buf = context.cmdline; + image->cmdline_buf_len = context.cmdline_sz; + + return 0; +err: + vfree(context.kernel); + vfree(context.initrd); + vfree(context.cmdline); + return ret; +} + diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index eb62a97942428c..48b4eee8559c93 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -231,18 +231,26 @@ kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd, kexec_dprintk("kernel: %p kernel_size: %#lx\n", image->kernel_buf, image->kernel_buf_len); - /* Call arch image probe handlers */ - ret = arch_kexec_kernel_image_probe(image, image->kernel_buf, - image->kernel_buf_len); - if (ret) - goto out; - #ifdef CONFIG_KEXEC_SIG ret = kimage_validate_signature(image); if (ret) goto out; #endif + + if (IS_ENABLED(CONFIG_KEXEC_BPF)) + decompose_kexec_image(image, initrd_fd); + + /* + * From this point, the kexec subsystem handle the kernel boot protocol. + * + * Call arch image probe handlers + */ + ret = arch_kexec_kernel_image_probe(image, image->kernel_buf, + image->kernel_buf_len); + if (ret) + goto out; + /* It is possible that there no initramfs is being loaded */ if (!(flags & KEXEC_FILE_NO_INITRAMFS)) { ret = kernel_read_file_from_fd(initrd_fd, 0, &image->initrd_buf, @@ -881,6 +889,51 @@ static int kexec_calculate_store_digests(struct kimage *image) return ret; } +#if defined(CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY) || defined(CONFIG_KEXEC_BPF) +const Elf_Sym *elf_find_symbol(const Elf_Ehdr *ehdr, const char *name) +{ + const Elf_Shdr *sechdrs; + const Elf_Sym *syms; + const char *strtab; + int i, k; + + sechdrs = (void *)ehdr + ehdr->e_shoff; + + for (i = 0; i < ehdr->e_shnum; i++) { + if (sechdrs[i].sh_type != SHT_SYMTAB) + continue; + + if (sechdrs[i].sh_link >= ehdr->e_shnum) + /* Invalid strtab section number */ + continue; + strtab = (void *)ehdr + sechdrs[sechdrs[i].sh_link].sh_offset; + syms = (void *)ehdr + sechdrs[i].sh_offset; + + /* Go through symbols for a match */ + for (k = 0; k < sechdrs[i].sh_size/sizeof(Elf_Sym); k++) { + if (ELF_ST_BIND(syms[k].st_info) != STB_GLOBAL) + continue; + + if (strcmp(strtab + syms[k].st_name, name) != 0) + continue; + + if (syms[k].st_shndx == SHN_UNDEF || + syms[k].st_shndx >= ehdr->e_shnum) { + pr_debug("Symbol: %s has bad section index %d.\n", + name, syms[k].st_shndx); + return NULL; + } + + /* Found the symbol we are looking for */ + return &syms[k]; + } + } + + return NULL; +} + +#endif + #ifdef CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY /* * kexec_purgatory_setup_kbuf - prepare buffer to load purgatory. @@ -1138,49 +1191,10 @@ int kexec_load_purgatory(struct kimage *image, struct kexec_buf *kbuf) static const Elf_Sym *kexec_purgatory_find_symbol(struct purgatory_info *pi, const char *name) { - const Elf_Shdr *sechdrs; - const Elf_Ehdr *ehdr; - const Elf_Sym *syms; - const char *strtab; - int i, k; - if (!pi->ehdr) return NULL; - ehdr = pi->ehdr; - sechdrs = (void *)ehdr + ehdr->e_shoff; - - for (i = 0; i < ehdr->e_shnum; i++) { - if (sechdrs[i].sh_type != SHT_SYMTAB) - continue; - - if (sechdrs[i].sh_link >= ehdr->e_shnum) - /* Invalid strtab section number */ - continue; - strtab = (void *)ehdr + sechdrs[sechdrs[i].sh_link].sh_offset; - syms = (void *)ehdr + sechdrs[i].sh_offset; - - /* Go through symbols for a match */ - for (k = 0; k < sechdrs[i].sh_size/sizeof(Elf_Sym); k++) { - if (ELF_ST_BIND(syms[k].st_info) != STB_GLOBAL) - continue; - - if (strcmp(strtab + syms[k].st_name, name) != 0) - continue; - - if (syms[k].st_shndx == SHN_UNDEF || - syms[k].st_shndx >= ehdr->e_shnum) { - pr_debug("Symbol: %s has bad section index %d.\n", - name, syms[k].st_shndx); - return NULL; - } - - /* Found the symbol we are looking for */ - return &syms[k]; - } - } - - return NULL; + return elf_find_symbol(pi->ehdr, name); } void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name) diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h index 228bb88c018bce..2e8e0fedbe2a9a 100644 --- a/kernel/kexec_internal.h +++ b/kernel/kexec_internal.h @@ -36,6 +36,11 @@ static inline void kexec_unlock(void) void kimage_file_post_load_cleanup(struct kimage *image); extern char kexec_purgatory[]; extern size_t kexec_purgatory_size; +extern bool pe_has_bpf_section(const char *file_buf, unsigned long pe_sz); +extern int pe_get_section(const char *file_buf, const char *sect_name, + char **sect_start, unsigned long *sect_sz); +extern int decompose_kexec_image(struct kimage *image, int extended_fd); +extern const Elf_Sym *elf_find_symbol(const Elf_Ehdr *ehdr, const char *name); #else /* CONFIG_KEXEC_FILE */ static inline void kimage_file_post_load_cleanup(struct kimage *image) { } #endif /* CONFIG_KEXEC_FILE */ diff --git a/kernel/kexec_uefi_app.c b/kernel/kexec_uefi_app.c new file mode 100644 index 00000000000000..7f0caf171d326c --- /dev/null +++ b/kernel/kexec_uefi_app.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * UEFI appilication file helpers + + * Copyright (C) 2025 Red Hat, Inc + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "kexec_internal.h" + +/* + * The UEFI Terse Executable (TE) image has MZ header. + */ +static bool is_valid_pe(const char *kernel_buf, unsigned long kernel_len) +{ + struct mz_hdr *mz; + struct pe_hdr *pe; + + if (!kernel_buf) + return false; + mz = (struct mz_hdr *)kernel_buf; + if (mz->magic != IMAGE_DOS_SIGNATURE) + return false; + pe = (struct pe_hdr *)(kernel_buf + mz->peaddr); + if (pe->magic != IMAGE_NT_SIGNATURE) + return false; + if (pe->opt_hdr_size == 0) { + pr_err("optional header is missing\n"); + return false; + } + return true; +} + +bool pe_has_bpf_section(const char *file_buf, unsigned long pe_sz) +{ + char *sect_start = NULL; + unsigned long sect_sz = 0; + int ret; + + if (!is_valid_pe(file_buf, pe_sz)) + return false; + ret = pe_get_section(file_buf, ".bpf", §_start, §_sz); + if (ret < 0) + return false; + return true; +} + +int pe_get_section(const char *file_buf, const char *sect_name, + char **sect_start, unsigned long *sect_sz) +{ + struct pe_hdr *pe_hdr; + struct pe32plus_opt_hdr *opt_hdr; + struct section_header *sect_hdr; + int section_nr, i; + struct mz_hdr *mz = (struct mz_hdr *)file_buf; + + *sect_start = NULL; + *sect_sz = 0; + pe_hdr = (struct pe_hdr *)(file_buf + mz->peaddr); + section_nr = pe_hdr->sections; + opt_hdr = (struct pe32plus_opt_hdr *)(file_buf + mz->peaddr + + sizeof(struct pe_hdr)); + sect_hdr = (struct section_header *)((char *)opt_hdr + + pe_hdr->opt_hdr_size); + + for (i = 0; i < section_nr; i++) { + if (strcmp(sect_hdr->name, sect_name) == 0) { + *sect_start = (char *)file_buf + sect_hdr->data_addr; + *sect_sz = sect_hdr->raw_data_size; + return 0; + } + sect_hdr++; + } + + return -1; +} diff --git a/lib/Kconfig b/lib/Kconfig index 2923924bea78cb..4424ae14dcf129 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -165,6 +165,12 @@ config RANDOM32_SELFTEST # # compression support is select'ed if needed # +config KEEP_DECOMPRESSOR + bool "keeps the decompress routines after kernel initialization" + default n + help + This option keeps the decompress routines after kernel initialization + config 842_COMPRESS select CRC32 tristate diff --git a/lib/decompress.c b/lib/decompress.c index 7785471586c627..29d4c749f1fc48 100644 --- a/lib/decompress.c +++ b/lib/decompress.c @@ -6,7 +6,7 @@ */ #include - +#include #include #include #include @@ -48,7 +48,7 @@ struct compress_format { decompress_fn decompressor; }; -static const struct compress_format compressed_formats[] __initconst = { +static const struct compress_format compressed_formats[] INITCONST = { { .magic = {0x1f, 0x8b}, .name = "gzip", .decompressor = gunzip }, { .magic = {0x1f, 0x9e}, .name = "gzip", .decompressor = gunzip }, { .magic = {0x42, 0x5a}, .name = "bzip2", .decompressor = bunzip2 }, @@ -60,7 +60,7 @@ static const struct compress_format compressed_formats[] __initconst = { { /* sentinel */ } }; -decompress_fn __init decompress_method(const unsigned char *inbuf, long len, +decompress_fn INIT decompress_method(const unsigned char *inbuf, long len, const char **name) { const struct compress_format *cf; diff --git a/scripts/coccicheck b/scripts/coccicheck index 0e6bc5a10320c9..89d591af5f3e7b 100755 --- a/scripts/coccicheck +++ b/scripts/coccicheck @@ -270,7 +270,11 @@ fi if [ "$COCCI" = "" ] ; then for f in `find $srctree/scripts/coccinelle/ -name '*.cocci' -type f | sort`; do - coccinelle $f + if grep -q "virtual[[:space:]]\+$MODE" "$f"; then + coccinelle $f + else + echo "warning: Skipping $f as it does not match mode '$MODE'" + fi done else coccinelle $COCCI diff --git a/scripts/coccinelle/api/pm_runtime.cocci b/scripts/coccinelle/api/pm_runtime.cocci index bf128ccae92101..b720489418fa4c 100644 --- a/scripts/coccinelle/api/pm_runtime.cocci +++ b/scripts/coccinelle/api/pm_runtime.cocci @@ -109,5 +109,5 @@ p2 << r.p2; pm_runtime_api << r.pm_runtime_api; @@ -msg = "%s returns < 0 as error. Unecessary IS_ERR_VALUE at line %s" % (pm_runtime_api, p2[0].line) +msg = "%s returns < 0 as error. Unnecessary IS_ERR_VALUE at line %s" % (pm_runtime_api, p2[0].line) coccilib.report.print_report(p1[0],msg) diff --git a/tools/kexec/Makefile b/tools/kexec/Makefile new file mode 100644 index 00000000000000..acb045367adfad --- /dev/null +++ b/tools/kexec/Makefile @@ -0,0 +1,91 @@ +# SPDX-License-Identifier: GPL-2.0 + +# Ensure Kbuild variables are available +include ../scripts/Makefile.include + +srctree := $(patsubst %/tools/kexec,%,$(CURDIR)) +VMLINUX = $(srctree)/vmlinux +TOOLSDIR := $(srctree)/tools +LIBDIR := $(TOOLSDIR)/lib +BPFDIR := $(LIBDIR)/bpf +ARCH ?= $(shell uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/ -e s/riscv64/riscv/ -e s/loongarch.*/loongarch/) +# At present, zboot image format is used by arm64, riscv, loongarch +# And arch/$(ARCH)/boot/vmlinux.bin is the uncompressed file instead of arch/$(ARCH)/boot/Image +ifeq ($(ARCH),$(filter $(ARCH),arm64 riscv loongarch)) + EFI_IMAGE := $(srctree)/arch/$(ARCH)/boot/vmlinuz.efi + KERNEL_IMAGE := $(srctree)/arch/$(ARCH)/boot/vmlinux.bin +else + @echo "Unsupported architecture: $(ARCH)" + @exit 1 +endif + + +CC = clang +CFLAGS = -O2 +BPF_PROG_CFLAGS = -g -O2 -target bpf -Wall -I $(BPFDIR) -I . +BPFTOOL = bpftool + +# List of generated target files +HEADERS = vmlinux.h bpf_helper_defs.h image_size.h +ZBOOT_TARGETS = zboot_parser_bpf.o zboot_parser_bpf.lskel.h bytecode.c bytecode.o zboot_image_builder zboot.efi + + +# Targets +zboot: $(HEADERS) $(ZBOOT_TARGETS) + +# Rule to generate vmlinux.h from vmlinux +vmlinux.h: $(VMLINUX) + @command -v $(BPFTOOL) >/dev/null 2>&1 || { echo >&2 "$(BPFTOOL) is required but not found. Please install it."; exit 1; } + @$(BPFTOOL) btf dump file $(VMLINUX) format c > vmlinux.h + +bpf_helper_defs.h: $(srctree)/tools/include/uapi/linux/bpf.h + @$(QUIET_GEN)$(srctree)/scripts/bpf_doc.py --header \ + --file $(srctree)/tools/include/uapi/linux/bpf.h > bpf_helper_defs.h + +image_size.h: $(KERNEL_IMAGE) + @{ \ + if [ ! -f "$(KERNEL_IMAGE)" ]; then \ + echo "Error: File '$(KERNEL_IMAGE)' does not exist"; \ + exit 1; \ + fi; \ + FILE_SIZE=$$(stat -c '%s' "$(KERNEL_IMAGE)" 2>/dev/null); \ + POWER=4096; \ + while [ $$POWER -le $$FILE_SIZE ]; do \ + POWER=$$((POWER * 2)); \ + done; \ + RINGBUF_SIZE=$$POWER; \ + echo "#define IMAGE_SIZE_POWER2_ALIGN $$RINGBUF_SIZE" > $@; \ + echo "#define IMAGE_SIZE $$FILE_SIZE" >> $@; \ + } + + +# Rule to generate zboot_parser_bpf.o, depends on vmlinux.h +zboot_parser_bpf.o: zboot_parser_bpf.c vmlinux.h bpf_helper_defs.h + @$(CC) $(BPF_PROG_CFLAGS) -c zboot_parser_bpf.c -o zboot_parser_bpf.o + +# Generate zboot_parser_bpf.lskel.h using bpftool +zboot_parser_bpf.lskel.h: zboot_parser_bpf.o + @$(BPFTOOL) gen skeleton -L zboot_parser_bpf.o > zboot_parser_bpf.lskel.h + +# Then, extract the opts_data[] and opts_insn[] arrays and remove 'static' +# keywords to avoid being optimized away. +bytecode.c: zboot_parser_bpf.lskel.h + @sed -n '/static const char opts_data\[\]/,/;/p' zboot_parser_bpf.lskel.h | sed 's/static const/const/' > $@ + @sed -n '/static const char opts_insn\[\]/,/;/p' zboot_parser_bpf.lskel.h | sed 's/static const/const/' >> $@ + +bytecode.o: bytecode.c + @$(CC) -c $< -o $@ + +# Rule to build zboot_image_builder executable +zboot_image_builder: zboot_image_builder.c + @$(CC) $(CFLAGS) $< -o $@ + +zboot.efi: zboot_image_builder bytecode.o + @chmod +x zboot_image_builder + @./zboot_image_builder $(EFI_IMAGE) bytecode.o $@ + +# Clean up generated files +clean: + @rm -f $(HEADERS) $(ZBOOT_TARGETS) + +.PHONY: all clean diff --git a/tools/kexec/pe.h b/tools/kexec/pe.h new file mode 100644 index 00000000000000..c2273d3fc3bb31 --- /dev/null +++ b/tools/kexec/pe.h @@ -0,0 +1,177 @@ +/* + * Extract from linux kernel include/linux/pe.h + */ + +#ifndef __PE_H__ +#define __PE_H__ + +#define IMAGE_DOS_SIGNATURE 0x5a4d /* "MZ" */ +#define IMAGE_NT_SIGNATURE 0x00004550 /* "PE\0\0" */ + +struct mz_hdr { + uint16_t magic; /* MZ_MAGIC */ + uint16_t lbsize; /* size of last used block */ + uint16_t blocks; /* pages in file, 0x3 */ + uint16_t relocs; /* relocations */ + uint16_t hdrsize; /* header size in "paragraphs" */ + uint16_t min_extra_pps; /* .bss */ + uint16_t max_extra_pps; /* runtime limit for the arena size */ + uint16_t ss; /* relative stack segment */ + uint16_t sp; /* initial %sp register */ + uint16_t checksum; /* word checksum */ + uint16_t ip; /* initial %ip register */ + uint16_t cs; /* initial %cs relative to load segment */ + uint16_t reloc_table_offset; /* offset of the first relocation */ + uint16_t overlay_num; /* overlay number. set to 0. */ + uint16_t reserved0[4]; /* reserved */ + uint16_t oem_id; /* oem identifier */ + uint16_t oem_info; /* oem specific */ + uint16_t reserved1[10]; /* reserved */ + uint32_t peaddr; /* address of pe header */ + char message[]; /* message to print */ +}; + +struct pe_hdr { + uint32_t magic; /* PE magic */ + uint16_t machine; /* machine type */ + uint16_t sections; /* number of sections */ + uint32_t timestamp; /* time_t */ + uint32_t symbol_table; /* symbol table offset */ + uint32_t symbols; /* number of symbols */ + uint16_t opt_hdr_size; /* size of optional header */ + uint16_t flags; /* flags */ +}; + +/* the fact that pe32 isn't padded where pe32+ is 64-bit means union won't + * work right. vomit. */ +struct pe32_opt_hdr { + /* "standard" header */ + uint16_t magic; /* file type */ + uint8_t ld_major; /* linker major version */ + uint8_t ld_minor; /* linker minor version */ + uint32_t text_size; /* size of text section(s) */ + uint32_t data_size; /* size of data section(s) */ + uint32_t bss_size; /* size of bss section(s) */ + uint32_t entry_point; /* file offset of entry point */ + uint32_t code_base; /* relative code addr in ram */ + uint32_t data_base; /* relative data addr in ram */ + /* "windows" header */ + uint32_t image_base; /* preferred load address */ + uint32_t section_align; /* alignment in bytes */ + uint32_t file_align; /* file alignment in bytes */ + uint16_t os_major; /* major OS version */ + uint16_t os_minor; /* minor OS version */ + uint16_t image_major; /* major image version */ + uint16_t image_minor; /* minor image version */ + uint16_t subsys_major; /* major subsystem version */ + uint16_t subsys_minor; /* minor subsystem version */ + uint32_t win32_version; /* reserved, must be 0 */ + uint32_t image_size; /* image size */ + uint32_t header_size; /* header size rounded up to + file_align */ + uint32_t csum; /* checksum */ + uint16_t subsys; /* subsystem */ + uint16_t dll_flags; /* more flags! */ + uint32_t stack_size_req;/* amt of stack requested */ + uint32_t stack_size; /* amt of stack required */ + uint32_t heap_size_req; /* amt of heap requested */ + uint32_t heap_size; /* amt of heap required */ + uint32_t loader_flags; /* reserved, must be 0 */ + uint32_t data_dirs; /* number of data dir entries */ +}; + +struct pe32plus_opt_hdr { + uint16_t magic; /* file type */ + uint8_t ld_major; /* linker major version */ + uint8_t ld_minor; /* linker minor version */ + uint32_t text_size; /* size of text section(s) */ + uint32_t data_size; /* size of data section(s) */ + uint32_t bss_size; /* size of bss section(s) */ + uint32_t entry_point; /* file offset of entry point */ + uint32_t code_base; /* relative code addr in ram */ + /* "windows" header */ + uint64_t image_base; /* preferred load address */ + uint32_t section_align; /* alignment in bytes */ + uint32_t file_align; /* file alignment in bytes */ + uint16_t os_major; /* major OS version */ + uint16_t os_minor; /* minor OS version */ + uint16_t image_major; /* major image version */ + uint16_t image_minor; /* minor image version */ + uint16_t subsys_major; /* major subsystem version */ + uint16_t subsys_minor; /* minor subsystem version */ + uint32_t win32_version; /* reserved, must be 0 */ + uint32_t image_size; /* image size */ + uint32_t header_size; /* header size rounded up to + file_align */ + uint32_t csum; /* checksum */ + uint16_t subsys; /* subsystem */ + uint16_t dll_flags; /* more flags! */ + uint64_t stack_size_req;/* amt of stack requested */ + uint64_t stack_size; /* amt of stack required */ + uint64_t heap_size_req; /* amt of heap requested */ + uint64_t heap_size; /* amt of heap required */ + uint32_t loader_flags; /* reserved, must be 0 */ + uint32_t data_dirs; /* number of data dir entries */ +}; + +struct data_dirent { + uint32_t virtual_address; /* relative to load address */ + uint32_t size; +}; + +struct data_directory { + struct data_dirent exports; /* .edata */ + struct data_dirent imports; /* .idata */ + struct data_dirent resources; /* .rsrc */ + struct data_dirent exceptions; /* .pdata */ + struct data_dirent certs; /* certs */ + struct data_dirent base_relocations; /* .reloc */ + struct data_dirent debug; /* .debug */ + struct data_dirent arch; /* reservered */ + struct data_dirent global_ptr; /* global pointer reg. Size=0 */ + struct data_dirent tls; /* .tls */ + struct data_dirent load_config; /* load configuration structure */ + struct data_dirent bound_imports; /* no idea */ + struct data_dirent import_addrs; /* import address table */ + struct data_dirent delay_imports; /* delay-load import table */ + struct data_dirent clr_runtime_hdr; /* .cor (object only) */ + struct data_dirent reserved; +}; + +struct section_header { + char name[8]; /* name or "/12\0" string tbl offset */ + uint32_t virtual_size; /* size of loaded section in ram */ + uint32_t virtual_address; /* relative virtual address */ + uint32_t raw_data_size; /* size of the section */ + uint32_t data_addr; /* file pointer to first page of sec */ + uint32_t relocs; /* file pointer to relocation entries */ + uint32_t line_numbers; /* line numbers! */ + uint16_t num_relocs; /* number of relocations */ + uint16_t num_lin_numbers; /* srsly. */ + uint32_t flags; +}; + +struct win_certificate { + uint32_t length; + uint16_t revision; + uint16_t cert_type; +}; + +/* + * Return -1 if not PE, else offset of the PE header + */ +static int get_pehdr_offset(const char *buf) +{ + int pe_hdr_offset; + + pe_hdr_offset = *((int *)(buf + 0x3c)); + buf += pe_hdr_offset; + if (!!memcmp(buf, "PE\0\0", 4)) { + printf("Not a PE file\n"); + return -1; + } + + return pe_hdr_offset; +} + +#endif diff --git a/tools/kexec/template.c b/tools/kexec/template.c new file mode 100644 index 00000000000000..9f17a4952ecd49 --- /dev/null +++ b/tools/kexec/template.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Copyright (C) 2026 Red Hat, Inc +// +// Original file: kernel/kexec_bpf/template.c +// +#include "vmlinux.h" +#include +#include +#include +#include + +/* Command to kernel kexec bpf loader, which is defined on the stream */ +#define KEXEC_BPF_CMD_DECOMPRESS 0x1 +#define KEXEC_BPF_CMD_COPY 0x2 + +#define KEXEC_BPF_SUBCMD_KERNEL 0x1 +#define KEXEC_BPF_SUBCMD_INITRD 0x2 +#define KEXEC_BPF_SUBCMD_CMDLINE 0x3 + +/* + * The ringbufs can have different capacity. But only four ringbuf are provided. + */ +#ifndef RINGBUF1_SIZE +#define RINGBUF1_SIZE 4 +#endif +#ifndef RINGBUF2_SIZE +#define RINGBUF2_SIZE 4 +#endif +#ifndef RINGBUF3_SIZE +#define RINGBUF3_SIZE 4 +#endif +#ifndef RINGBUF4_SIZE +#define RINGBUF4_SIZE 4 +#endif + +/* ringbuf is safe since the user space has no write access to them */ +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, RINGBUF1_SIZE); +} ringbuf_1 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, RINGBUF2_SIZE); +} ringbuf_2 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, RINGBUF3_SIZE); +} ringbuf_3 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, RINGBUF4_SIZE); +} ringbuf_4 SEC(".maps"); + +char LICENSE[] SEC("license") = "GPL"; + +/* + * This function ensures that the sections .rodata, .data, .rodata.str1.1 and .bss + * are created for a bpf prog. + */ +static const char dummy_rodata[16] __attribute__((used)) = "rodata"; +static char dummy_data[16] __attribute__((used)) = "data"; +static char *dummy_mergeable_str __attribute__((used)) = ".rodata.str1.1"; +static char dummy_bss[16] __attribute__((used)); + diff --git a/tools/kexec/zboot_image_builder.c b/tools/kexec/zboot_image_builder.c new file mode 100644 index 00000000000000..c0a785074970e5 --- /dev/null +++ b/tools/kexec/zboot_image_builder.c @@ -0,0 +1,278 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2025 Red Hat, Inc. + * The zboot format carries the compressed kernel image offset and size + * information in the DOS header. The program appends a bpf section to PE file, + * meanwhile maintains the offset and size information, which is lost when using + * objcopy to handle zboot image. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include "pe.h" + +#ifdef DEBUG_DETAIL + #define dprintf(...) printf(__VA_ARGS__) +#else + #define dprintf(...) ((void)0) +#endif + +typedef struct { + union { + struct { + unsigned int mz_magic; + char image_type[4]; + /* offset to the whole file start */ + unsigned int payload_offset; + unsigned int payload_size; + unsigned int reserved[2]; + char comp_type[4]; + }; + char raw_bytes[56]; + }; + unsigned int linux_pe_magic; + /* offset at: 0x3c or 60 */ + unsigned int pe_header_offset; +} __attribute__((packed)) pe_zboot_header; + +typedef unsigned long uintptr_t; +#define ALIGN_UP(p, size) (__typeof__(p))(((uintptr_t)(p) + ((size) - 1)) & ~((size) - 1)) + +int main(int argc, char **argv) +{ + uint32_t payload_new_offset, payload_sect_off; + uint32_t payload_size; + uint32_t payload_sect_idx; + pe_zboot_header *zheader; + struct pe_hdr *pe_hdr; + struct pe32plus_opt_hdr *opt_hdr; + int base_fd, out_fd; + char *base_start_addr, *base_cur; + char *out_start_addr, *out_cur; + uint32_t out_sz, max_va_end = 0; + struct stat sb; + int i = 0, ret = 0; + + if (argc != 4) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return -1; + } + + const char *original_pe = argv[1]; + const char *binary_file = argv[2]; + const char *new_pe = argv[3]; + FILE *bin_fp = fopen(binary_file, "rb"); + if (!bin_fp) { + perror("Failed to open binary file"); + return -1; + } + fseek(bin_fp, 0, SEEK_END); + size_t bin_size = ftell(bin_fp); + fseek(bin_fp, 0, SEEK_SET); + base_fd = open(original_pe, O_RDWR); + out_fd = open(new_pe, O_RDWR | O_CREAT, 0644); + if (base_fd == -1 || out_fd == -1) { + perror("Error opening file"); + exit(1); + } + + if (fstat(base_fd, &sb) == -1) { + perror("Error getting file size"); + exit(1); + } + base_start_addr = mmap(NULL, sb.st_size, PROT_READ, MAP_SHARED, base_fd, 0); + if (base_start_addr == MAP_FAILED) { + perror("Error mmapping the file"); + exit(1); + } + /* 64KB for section table extending */ + out_sz = sb.st_size + bin_size + (1 << 16); + out_start_addr = mmap(NULL, out_sz, PROT_WRITE, MAP_SHARED, out_fd, 0); + if (ftruncate(out_fd, out_sz) == -1) { + perror("Failed to resize output file"); + ret = -1; + goto err; + } + if (out_start_addr == MAP_FAILED) { + perror("Error mmapping the file"); + exit(1); + } + + zheader = (pe_zboot_header *)base_start_addr; + if (zheader->mz_magic != 0x5A4D) { // 'MZ' + fprintf(stderr, "Invalid DOS signature\n"); + return -1; + } + uint32_t pe_hdr_offset = get_pehdr_offset((const char *)base_start_addr); + base_cur = base_start_addr + pe_hdr_offset; + pe_hdr = (struct pe_hdr *)base_cur; + if (pe_hdr->magic!= 0x00004550) { // 'PE\0\0' + fprintf(stderr, "Invalid PE signature\n"); + return -1; + } + base_cur += sizeof(struct pe_hdr); + opt_hdr = (struct pe32plus_opt_hdr *)base_cur; + uint32_t file_align = opt_hdr->file_align; + uint32_t section_alignment = opt_hdr->section_align; + + uint16_t num_sections = pe_hdr->sections; + struct section_header *base_sections, *sect; + uint32_t section_table_offset = pe_hdr_offset + sizeof(struct pe_hdr) + pe_hdr->opt_hdr_size; + base_sections = (struct section_header *)(base_start_addr + section_table_offset); + + /* Decide the section idx and the payload offset within the section */ + for (i = 0; i < num_sections; i++) { + sect = &base_sections[i]; + if (zheader->payload_offset >= sect->data_addr && + zheader->payload_offset < (sect->data_addr + sect->raw_data_size)) { + payload_sect_idx = i; + payload_sect_off = zheader->payload_offset - sect->data_addr; + } + } + + /* Calculate the end of the last section in virtual memory */ + for (i = 0; i < num_sections; i++) { + uint32_t section_end = base_sections[i].virtual_address + base_sections[i].virtual_size; + if (section_end > max_va_end) { + max_va_end = section_end; + } + } + + /* Calculate virtual address for the new .bpf section */ + uint32_t bpf_virtual_address = ALIGN_UP(max_va_end, section_alignment); + + pe_zboot_header *new_zhdr = malloc(sizeof(pe_zboot_header)); + memcpy(new_zhdr, zheader, sizeof(pe_zboot_header)); + struct pe_hdr *new_hdr = malloc(sizeof(struct pe_hdr)); + memcpy(new_hdr, pe_hdr, sizeof(struct pe_hdr)); + new_hdr->sections += 1; + struct pe32plus_opt_hdr *new_opt_hdr = malloc(pe_hdr->opt_hdr_size); + memcpy(new_opt_hdr, opt_hdr, pe_hdr->opt_hdr_size); + /* Create new section headers array (original + new section) */ + struct section_header *new_sections = calloc(1, new_hdr->sections * sizeof(struct section_header)); + if (!new_sections) { + perror("Failed to allocate memory for new section headers"); + return -1; + } + memcpy(new_sections, base_sections, pe_hdr->sections * sizeof(struct section_header)); + + /* Configure the new .bpf section */ + struct section_header *bpf_section = &new_sections[new_hdr->sections - 1]; + memset(bpf_section, 0, sizeof(struct section_header)); + strncpy((char *)bpf_section->name, ".bpf", 8); + bpf_section->virtual_size = bin_size; + bpf_section->virtual_address = bpf_virtual_address; + bpf_section->raw_data_size = bin_size; + bpf_section->flags = 0x40000000; //Readable + + /* Update headers */ + uint32_t new_size_of_image = bpf_section->virtual_address + bpf_section->virtual_size; + new_size_of_image = ALIGN_UP(new_size_of_image, section_alignment); + new_opt_hdr->image_size = new_size_of_image; + + size_t section_table_size = new_hdr->sections * (sizeof(struct section_header)); + size_t headers_size = section_table_offset + section_table_size; + size_t aligned_headers_size = ALIGN_UP(headers_size, file_align); + new_opt_hdr->header_size = aligned_headers_size; + + + uint32_t current_offset = aligned_headers_size; + /* + * If the original PE data_addr is covered by enlarged header_size + * re-assign new data_addr for all sections + */ + if (base_sections[0].data_addr < aligned_headers_size) { + for (i = 0; i < new_hdr->sections; i++) { + new_sections[i].data_addr = current_offset; + current_offset += ALIGN_UP(new_sections[i].raw_data_size, file_align); + } + /* Keep unchanged, just allocating file pointer for bpf section */ + } else { + uint32_t t; + i = new_hdr->sections - 2; + t = new_sections[i].data_addr + new_sections[i].raw_data_size; + i++; + new_sections[i].data_addr = ALIGN_UP(t, file_align); + } + + payload_new_offset = new_sections[payload_sect_idx].data_addr + payload_sect_off; + /* Update */ + new_zhdr->payload_offset = payload_new_offset; + new_zhdr->payload_size = zheader->payload_size; + dprintf("zboot payload_offset updated from 0x%x to 0x%x, size:0x%x\n", + zheader->payload_offset, payload_new_offset, new_zhdr->payload_size); + + + /* compose the new PE file */ + + /* Write Dos header */ + memcpy(out_start_addr, new_zhdr, sizeof(pe_zboot_header)); + out_cur = out_start_addr + pe_hdr_offset; + + /* Write PE header */ + memcpy(out_cur, new_hdr, sizeof(struct pe_hdr)); + out_cur += sizeof(struct pe_hdr); + + /* Write PE optional header */ + memcpy(out_cur, new_opt_hdr, new_hdr->opt_hdr_size); + out_cur += new_hdr->opt_hdr_size; + + /* Write all section headers */ + memcpy(out_cur, new_sections, new_hdr->sections * sizeof(struct section_header)); + + /* Skip padding and copy the section data */ + for (i = 0; i < pe_hdr->sections; i++) { + base_cur = base_start_addr + base_sections[i].data_addr; + out_cur = out_start_addr + new_sections[i].data_addr; + memcpy(out_cur, base_cur, base_sections[i].raw_data_size); + } + msync(out_start_addr, new_sections[i].data_addr + new_sections[i].raw_data_size, MS_ASYNC); + /* For the bpf section */ + out_cur = out_start_addr + new_sections[i].data_addr; + + /* Write .bpf section data */ + char *bin_data = calloc(1, bin_size); + if (!bin_data) { + perror("Failed to allocate memory for binary data"); + free(new_sections); + ret = -1; + goto err; + } + if (fread(bin_data, bin_size, 1, bin_fp) != 1) { + perror("Failed to read binary data"); + free(bin_data); + free(new_sections); + ret = -1; + goto err; + } + + if (out_cur + bin_size > out_start_addr + out_sz) { + perror("out of out_fd mmap\n"); + ret = -1; + goto err; + } + memcpy(out_cur, bin_data, bin_size); + /* calculate the real size */ + out_sz = out_cur + bin_size - out_start_addr; + msync(out_start_addr, out_sz, MS_ASYNC); + /* truncate to the real size */ + if (ftruncate(out_fd, out_sz) == -1) { + perror("Failed to resize output file"); + ret = -1; + goto err; + } + printf("Create a new PE file with bpf section: %s\n", new_pe); +err: + munmap(out_start_addr, out_sz); + munmap(base_start_addr, sb.st_size); + close(base_fd); + close(out_fd); + fclose(bin_fp); + + return ret; +} diff --git a/tools/kexec/zboot_parser_bpf.c b/tools/kexec/zboot_parser_bpf.c new file mode 100644 index 00000000000000..54c4b762b33242 --- /dev/null +++ b/tools/kexec/zboot_parser_bpf.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Copyright (C) 2025, 2026 Red Hat, Inc +// +#include "vmlinux.h" +#include +#include +#include "image_size.h" + +/* ringbuf 2,3,4 are useless */ +#define MIN_BUF_SIZE 1 +#define MAX_RECORD_SIZE (IMAGE_SIZE + 40960) +#define RINGBUF1_SIZE IMAGE_SIZE_POWER2_ALIGN +#define RINGBUF2_SIZE MIN_BUF_SIZE +#define RINGBUF3_SIZE MIN_BUF_SIZE +#define RINGBUF4_SIZE MIN_BUF_SIZE + + +#include "template.c" + +/* see drivers/firmware/efi/libstub/zboot-header.S */ +struct linux_pe_zboot_header { + unsigned int mz_magic; + char image_type[4]; + unsigned int payload_offset; + unsigned int payload_size; + unsigned int reserved[2]; + char comp_type[4]; + unsigned int linux_pe_magic; + unsigned int pe_header_offset; +} __attribute__((packed)); + + +SEC("fentry.s/kexec_image_parser_anchor") +int BPF_PROG(parse_pe, struct kexec_context *context, unsigned long parser_id) +{ + struct linux_pe_zboot_header *zboot_header; + unsigned int image_sz; + char *buf; + int ret = 0; + + image_sz = context->kernel_sz; + bpf_printk("begin parse PE\n"); + /* BPF verifier should know each variable initial state */ + if (!context->kernel || (image_sz > MAX_RECORD_SIZE)) { + bpf_printk("Err: image size is greater than 0x%lx\n", MAX_RECORD_SIZE); + return 0; + } + + /* In order to access bytes not aligned on 2 order, copy into ringbuf. + * And allocate the memory all at once, later overwriting. + * + * R2 is ARG_CONST_ALLOC_SIZE_OR_ZERO, should be decided at compling time + */ + buf = (char *)bpf_ringbuf_reserve(&ringbuf_1, MAX_RECORD_SIZE, 0); + if (!buf) { + bpf_printk("Err: fail to reserve ringbuf to parse zboot header\n"); + return 0; + } + bpf_probe_read((void *)buf, sizeof(struct linux_pe_zboot_header), context->kernel); + zboot_header = (struct linux_pe_zboot_header *)buf; + if (!!__builtin_memcmp(&zboot_header->image_type, "zimg", + sizeof(zboot_header->image_type))) { + bpf_ringbuf_discard(buf, BPF_RB_NO_WAKEUP); + bpf_printk("Err: image is not zboot image\n"); + return 0; + } + + unsigned int payload_offset = zboot_header->payload_offset; + unsigned int payload_size = zboot_header->payload_size; + bpf_printk("zboot image payload offset=0x%x, size=0x%x\n", payload_offset, payload_size); + /* sane check */ + if (payload_size > image_sz) { + bpf_ringbuf_discard(buf, BPF_RB_NO_WAKEUP); + bpf_printk("Invalid zboot image payload offset and size\n"); + return 0; + } + unsigned int max_payload = MAX_RECORD_SIZE - sizeof(struct cmd_hdr); + if (payload_size >= max_payload) { + bpf_ringbuf_discard(buf, BPF_RB_NO_WAKEUP); + bpf_printk("Err: payload_size > MAX_RECORD_SIZE\n"); + return 0; + } + void *dst = (void *)buf + sizeof(struct cmd_hdr); + /* Overwrite buf */ + struct cmd_hdr *cmd = (struct cmd_hdr *)buf; + cmd->cmd = KEXEC_BPF_CMD_DECOMPRESS; + cmd->subcmd = KEXEC_BPF_SUBCMD_KERNEL; + /* 4 bytes original size is appended after vmlinuz.bin */ + cmd->payload_len = payload_size - 4; + bpf_probe_read(dst, payload_size, context->kernel + payload_offset); + if (payload_size < 4) { + bpf_ringbuf_discard(buf, BPF_RB_NO_WAKEUP); + return 0; + } + bpf_printk("Calling bpf_kexec_decompress()\n"); + struct bpf_parser_context *bpf = bpf_get_parser_context(parser_id); + if (!bpf) { + bpf_ringbuf_discard(buf, BPF_RB_NO_WAKEUP); + bpf_printk("No parser in kernel\n"); + return 0; + } + ret = bpf_buffer_parser(buf, sizeof(struct cmd_hdr) + payload_size - 4, bpf); + if (ret < 0) { + bpf_ringbuf_discard(buf, BPF_RB_NO_WAKEUP); + bpf_put_parser_context(bpf); + bpf_printk("Decompression fails\n"); + return 0; + } + bpf_ringbuf_discard(buf, BPF_RB_NO_WAKEUP); + bpf_put_parser_context(bpf); + + return 0; +}