diff --git a/Makefile b/Makefile index e55d68b..9402b40 100644 --- a/Makefile +++ b/Makefile @@ -1,13 +1,22 @@ .PHONY: all +test: all + sudo rmmod afl_snapshot || echo "Not loaded anyways..." + sudo insmod src/afl_snapshot.ko + ./test/test3 + + + all: cd src && $(MAKE) cd lib && $(MAKE) + cd test && $(MAKE) clean: cd src && $(MAKE) clean cd lib && $(MAKE) clean + cd test && $(MAKE) code-format: ./.custom-format.py -i src/*.c diff --git a/README.md b/README.md index 1fe921e..d32d4ad 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,9 @@ however adding this snapshot module will still be a small improvement. |tiff|thumbnail|5058|3114|x1.6| |libxml|xmllint|7835|3450|x2.3| |afl++|test_persistent_new|106k|89k|x1.2| +|afl++|emmu_fuzz|10k-20k|40|x250-x500| + +**TODO:** Rerun the others with improved version? ## Usage diff --git a/load.sh b/load.sh index 10787ba..13a8ed5 100755 --- a/load.sh +++ b/load.sh @@ -1,5 +1,7 @@ #!/bin/sh +set -e -o pipefail + if [ '!' "$EUID" = 0 ] && [ '!' `id -u` = 0 ] ; then echo "Warning: you need to be root to run this!" # we do not exit as other mechanisms exist that allows to do this than @@ -8,6 +10,6 @@ fi cd src/ -rmmod afl_snapshot +rmmod afl_snapshot || echo "Not loaded anyways..." make insmod afl_snapshot.ko && echo Successfully loaded the snapshot module diff --git a/src/Makefile b/src/Makefile index 739e698..65009b8 100644 --- a/src/Makefile +++ b/src/Makefile @@ -46,10 +46,9 @@ endif LINUX_DIR ?= /lib/modules/$(shell uname -r)/build .PHONY: all +# env ARCH='$(ARCH)' LINUX_SYSTEM_MAP='$(LINUX_SYSTEM_MAP)' python3 lookup_symbols.py all: - env ARCH='$(ARCH)' LINUX_SYSTEM_MAP='$(LINUX_SYSTEM_MAP)' python3 lookup_symbols.py - $(MAKE) -C '$(LINUX_DIR)' M='$(M)' modules clean: diff --git a/src/debug.h b/src/debug.h index 2e3de44..0a534aa 100644 --- a/src/debug.h +++ b/src/debug.h @@ -54,5 +54,7 @@ #endif +#define PREEMPT_DEBUG(tag) SAYF("[%s():%s:%d] " tag " preempt_count() == %d\n", __FUNCTION__, __FILE__, __LINE__, preempt_count()) + #endif diff --git a/src/files.c b/src/files.c index bf80cb1..84d5bf2 100644 --- a/src/files.c +++ b/src/files.c @@ -57,7 +57,8 @@ void recover_files_snapshot(struct task_data *data) { DBG_PRINT("find new fds %d file* 0x%08lx\n", i, (unsigned long)file); // fdt->fd[i] = NULL; // filp_close(file, files); - __close_fd(files, i); + WARNF("closing doesn't work :(\n"); + // __close_fd(files, i); } diff --git a/src/ftrace_helper.h b/src/ftrace_helper.h new file mode 100644 index 0000000..299f566 --- /dev/null +++ b/src/ftrace_helper.h @@ -0,0 +1,219 @@ +/* + * Helper library for ftrace hooking kernel functions + * Author: Harvey Phillips (xcellerator@gmx.com) + * License: GPL + * */ + +#include +#include +#include +#include +#include +#include "debug.h" +#include "ftrace_util.h" + +#if defined(CONFIG_X86_64) && (LINUX_VERSION_CODE >= KERNEL_VERSION(4,17,0)) +#define PTREGS_SYSCALL_STUBS 1 +#endif + +/* + * On Linux kernels 5.7+, kallsyms_lookup_name() is no longer exported, + * so we have to use kprobes to get the address. + * Full credit to @f0lg0 for the idea. + */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5,7,0) +#define KPROBE_LOOKUP 1 +#include +static struct kprobe kp = { + .symbol_name = "kallsyms_lookup_name" +}; +typedef unsigned long (*kallsyms_lookup_name_t)(const char *name); +kallsyms_lookup_name_t kallsyms_lookup_name_var; +#define kallsyms_lookup_name kallsyms_lookup_name_var +#endif + +/* x64 has to be special and require a different naming convention */ +#ifdef PTREGS_SYSCALL_STUBS +#define SYSCALL_NAME(name) ("__x64_" name) +#else +#define SYSCALL_NAME(name) (name) +#endif + +#define HOOK(_name, _hook, _orig) \ +{ \ + .name = (_name), \ + .function = (_hook), \ + .original = (_orig), \ +} + +#define SYSCALL_HOOK(_name, _hook, _orig) \ +{ \ + .name = SYSCALL_NAME(_name), \ + .function = (_hook), \ + .original = (_orig), \ +} + + +/* We need to prevent recursive loops when hooking, otherwise the kernel will + * panic and hang. The options are to either detect recursion by looking at + * the function return address, or by jumping over the ftrace call. We use the + * first option, by setting USE_FENTRY_OFFSET = 0, but could use the other by + * setting it to 1. (Oridinarily ftrace provides it's own protections against + * recursion, but it relies on saving return registers in $rip. We will likely + * need the use of the $rip register in our hook, so we have to disable this + * protection and implement our own). + * */ +#define USE_FENTRY_OFFSET 0 +#if !USE_FENTRY_OFFSET +#pragma GCC optimize("-fno-optimize-sibling-calls") +#endif + +/* We pack all the information we need (name, hooking function, original function) + * into this struct. This makes is easier for setting up the hook and just passing + * the entire struct off to fh_install_hook() later on. + * */ +struct ftrace_hook { + const char *name; + void *function; + void *original; + + unsigned long address; + struct ftrace_ops ops; +}; + +/* Ftrace needs to know the address of the original function that we + * are going to hook. As before, we just use kallsyms_lookup_name() + * to find the address in kernel memory. + * */ +static int fh_resolve_hook_address(struct ftrace_hook *hook) +{ +#ifdef KPROBE_LOOKUP + register_kprobe(&kp); + kallsyms_lookup_name = (kallsyms_lookup_name_t) kp.addr; + unregister_kprobe(&kp); +#endif + hook->address = kallsyms_lookup_name(hook->name); + + if (!hook->address) + { + printk(KERN_DEBUG "rootkit: unresolved symbol: %s\n", hook->name); + return -ENOENT; + } + +#if USE_FENTRY_OFFSET + *((unsigned long*) hook->original) = hook->address + MCOUNT_INSN_SIZE; +#else + *((unsigned long*) hook->original) = hook->address; +#endif + + return 0; +} + +/* See comment below within fh_install_hook() */ +static void notrace fh_ftrace_thunk(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, ftrace_regs_ptr regs) +{ + struct ftrace_hook *hook = container_of(ops, struct ftrace_hook, ops); + struct pt_regs* pregs = ftrace_get_regs(regs); + +#if USE_FENTRY_OFFSET + pregs->ip = (unsigned long) hook->function; +#else + if(!within_module(parent_ip, THIS_MODULE)) + pregs->ip = (unsigned long) hook->function; +#endif +} + +/* Assuming we've already set hook->name, hook->function and hook->original, we + * can go ahead and install the hook with ftrace. This is done by setting the + * ops field of hook (see the comment below for more details), and then using + * the built-in ftrace_set_filter_ip() and register_ftrace_function() functions + * provided by ftrace.h + * */ +int fh_install_hook(struct ftrace_hook *hook) +{ + int err; + err = fh_resolve_hook_address(hook); + if(err) + return err; + SAYF("Successfully resolved address 0x%lx for function %s\n", hook->address, hook->name); + + /* For many of function hooks (especially non-trivial ones), the $rip + * register gets modified, so we have to alert ftrace to this fact. This + * is the reason for the SAVE_REGS and IP_MODIFY flags. However, we also + * need to OR the RECURSION_SAFE flag (effectively turning if OFF) because + * the built-in anti-recursion guard provided by ftrace is useless if + * we're modifying $rip. This is why we have to implement our own checks + * (see USE_FENTRY_OFFSET). */ + hook->ops.func = fh_ftrace_thunk; + hook->ops.flags = FTRACE_OPS_FL_SAVE_REGS + | FTRACE_OPS_FL_RECURSION_SAFE + | FTRACE_OPS_FL_IPMODIFY; + + err = ftrace_set_filter_ip(&hook->ops, hook->address, 0, 0); + if(err) + { + printk(KERN_DEBUG "rootkit: ftrace_set_filter_ip() failed: %d\n", err); + return err; + } + + err = register_ftrace_function(&hook->ops); + if(err) + { + printk(KERN_DEBUG "rootkit: register_ftrace_function() failed: %d\n", err); + return err; + } + + return 0; +} + +/* Disabling our function hook is just a simple matter of calling the built-in + * unregister_ftrace_function() and ftrace_set_filter_ip() functions (note the + * opposite order to that in fh_install_hook()). + * */ +void fh_remove_hook(struct ftrace_hook *hook) +{ + int err; + err = unregister_ftrace_function(&hook->ops); + if(err) + { + printk(KERN_DEBUG "rootkit: unregister_ftrace_function() failed: %d\n", err); + } + + err = ftrace_set_filter_ip(&hook->ops, hook->address, 1, 0); + if(err) + { + printk(KERN_DEBUG "rootkit: ftrace_set_filter_ip() failed: %d\n", err); + } +} + +/* To make it easier to hook multiple functions in one module, this provides + * a simple loop over an array of ftrace_hook struct + * */ +int fh_install_hooks(struct ftrace_hook *hooks, size_t count) +{ + int err; + size_t i; + + for (i = 0 ; i < count ; i++) + { + err = fh_install_hook(&hooks[i]); + if(err) + goto error; + } + return 0; + +error: + while (i != 0) + { + fh_remove_hook(&hooks[--i]); + } + return err; +} + +void fh_remove_hooks(struct ftrace_hook *hooks, size_t count) +{ + size_t i; + + for (i = 0 ; i < count ; i++) + fh_remove_hook(&hooks[i]); +} \ No newline at end of file diff --git a/src/ftrace_util.h b/src/ftrace_util.h new file mode 100644 index 0000000..5969812 --- /dev/null +++ b/src/ftrace_util.h @@ -0,0 +1,18 @@ +#ifndef __FTRACE_UTIL_H +#define __FTRACE_UTIL_H + +#include +#include + +// In 5.11+, ftrace hooks take ftrace_regs as argument. +// Hacky way to fix this for older kernels. +#if LINUX_VERSION_CODE < KERNEL_VERSION(5,11,0) +typedef struct pt_regs* ftrace_regs_ptr; +#define ftrace_get_regs(reg_ptr) reg_ptr; +#define FTRACE_OPS_FL_RECURSION 0 +#else +typedef struct ftrace_regs* ftrace_regs_ptr; +#define FTRACE_OPS_FL_RECURSION_SAFE 0 +#endif + +#endif /* __FTRACE_UTIL_H */ diff --git a/src/hook.c b/src/hook.c index 9b39f79..f0946ef 100644 --- a/src/hook.c +++ b/src/hook.c @@ -1,13 +1,17 @@ #include #include +#include #include #include - +#include +#include "debug.h" +#include "ftrace_util.h" // TODO(andrea) switch from Kprobes to Ftrace struct hook { struct kprobe kp; + struct ftrace_ops fops; struct list_head l; }; @@ -15,13 +19,17 @@ struct hook { LIST_HEAD(hooks); int try_hook(const char *func_name, void *handler) { - + SAYF("Hooking function %s\n", func_name); struct hook *hook = kmalloc(sizeof(struct hook), GFP_KERNEL | __GFP_ZERO); INIT_LIST_HEAD(&hook->l); hook->kp.symbol_name = func_name; hook->kp.pre_handler = handler; - - int ret = register_kprobe(&hook->kp); + hook->fops.flags = FTRACE_OPS_FL_SAVE_REGS | FTRACE_OPS_FL_IPMODIFY | FTRACE_OPS_FL_RECURSION; + hook->fops.func = handler; + ftrace_set_filter(&hook->fops, func_name, strlen(func_name), 0); + int ret = register_ftrace_function(&hook->fops); + SAYF("Hooked function: %d\n", ret); + // int ret = register_kprobe(&hook->kp); if (!ret) { list_add(&hook->l, &hooks); } return true; @@ -35,7 +43,8 @@ void unhook(const char *func_name) { if (!strcmp(hook->kp.symbol_name, func_name)) { - unregister_kprobe(&hook->kp); + // unregister_kprobe(&hook->kp); + unregister_ftrace_function(&hook->fops); } @@ -48,7 +57,8 @@ void unhook_all(void) { struct hook *hook = NULL; list_for_each_entry(hook, &hooks, l) { - unregister_kprobe(&hook->kp); + // unregister_kprobe(&hook->kp); + unregister_ftrace_function(&hook->fops); } diff --git a/src/lookup_symbols.py b/src/lookup_symbols.py index f7e8da9..14c41a4 100644 --- a/src/lookup_symbols.py +++ b/src/lookup_symbols.py @@ -13,6 +13,8 @@ system_map = map(lambda x: x.split(), fd.read().split('\n')) +# print("system_map:", list(system_map)) + register_chrdev_region = None sys_call_table = None sys_read = None diff --git a/src/memory.c b/src/memory.c index 24b381d..676ffb2 100644 --- a/src/memory.c +++ b/src/memory.c @@ -292,6 +292,8 @@ void make_snapshot_page(struct task_data *data, struct mm_struct *mm, } +// TODO: This seems broken? +// If I have a page that is right below the page of the stack, then it will count as a stack page. inline bool is_stack(struct vm_area_struct *vma) { return vma->vm_start <= vma->vm_mm->start_stack && @@ -305,10 +307,13 @@ void take_memory_snapshot(struct task_data *data) { unsigned long addr; get_cpu_var(last_task) = NULL; - put_cpu_var(last_task); get_cpu_var(last_data) = NULL; + put_cpu_var(last_task); put_cpu_var(last_data); + // Only do loops if DBG_PRINT actually does something. + // Not sure if compiler would be smart enough to eliminate these anyways. + #if DEBUG struct vmrange_node *n = data->allowlist; while (n) { @@ -324,6 +329,7 @@ void take_memory_snapshot(struct task_data *data) { n = n->next; } + #endif do { @@ -332,8 +338,9 @@ void take_memory_snapshot(struct task_data *data) { add_snapshot_vma(data, pvma->vm_start, pvma->vm_end); // We only care about writable pages. Shared memory pages are skipped - // if notsack is specified, skip if this this the stack + // if nostack is specified, skip if this this the stack // Otherwise, look into the allowlist + // SAYF("Considering: 0x%016lx - 0x%016lx (stack: %d)", pvma->vm_start, pvma->vm_end, is_stack(pvma)); if (((pvma->vm_flags & VM_WRITE) && !(pvma->vm_flags & VM_SHARED) && !((data->config & AFL_SNAPSHOT_NOSTACK) && is_stack(pvma))) || intersect_allowlist(pvma->vm_start, pvma->vm_end)) { @@ -358,7 +365,6 @@ void take_memory_snapshot(struct task_data *data) { pvma = pvma->vm_next; } while (pvma != NULL); - } void munmap_new_vmas(struct task_data *data) { @@ -458,7 +464,6 @@ void do_recover_page(struct snapshot_page *sp) { "0x%08lx\n", (unsigned long)sp->page_data, (unsigned long)sp->page_base, sp->page_prot); - if (copy_to_user((void __user *)sp->page_base, sp->page_data, PAGE_SIZE) != 0) DBG_PRINT("incomplete copy_to_user\n"); sp->dirty = false; @@ -483,10 +488,20 @@ void recover_memory_snapshot(struct task_data *data) { pte_t * pte, entry; int i; - if (data->config & AFL_SNAPSHOT_MMAP) munmap_new_vmas(data); + int count = 0; + if (data->config & AFL_SNAPSHOT_MMAP) munmap_new_vmas(data); + // Instead of iterating over all pages in the snapshot and then restoring the dirty ones, + // we can save a lot of computing time by keeping a list of only dirty pages. + // Since we know exactly when pages match the conditions below, we can just insert them into the dirty list then. + // This had a massive boost on performance for me, >50%. (Might be more or less depending on a few factors). + // + // original loop below hash_for_each(data->ss.ss_page, i, sp, next) { - + struct list_head* ptr; + // for (ptr = data->ss.dirty_pages.next; ptr != &data->ss.dirty_pages; ptr = ptr->next){ + count++; + // sp = list_entry(ptr, struct snapshot_page, dirty_list); if (sp->dirty && sp->has_been_copied) { // it has been captured by page fault @@ -523,9 +538,21 @@ void recover_memory_snapshot(struct task_data *data) { sp->has_had_pte = false; } - + if (!sp->in_dirty_list) { + // WARNF("0x%016lx: sp->in_dirty_list = false, but we just encountered it in dirty list!?", sp->page_base); + } + sp->in_dirty_list = false; + // if (ptr->next == ptr || ptr->prev == ptr) { + // WARNF("0x%016lx: DETECTED CYCLE IN DIRTY LIST: ptr: %px, ptr->next: %px", sp->page_base, &ptr, ptr->next); + // break; + // } } + DBG_PRINT("HAD %d dirty pages!", count); + + // haha this is really dumb + // surely this will not come back to bite me later, right?? + INIT_LIST_HEAD(&data->ss.dirty_pages); } void clean_snapshot_vmas(struct task_data *data) { @@ -551,24 +578,36 @@ void clean_memory_snapshot(struct task_data *data) { struct snapshot_page *sp; int i; - if (get_cpu_var(last_task) == current) { + struct task_struct* ltask = get_cpu_var(last_task); + if (ltask == current) { get_cpu_var(last_task) = NULL; get_cpu_var(last_data) = NULL; - + put_cpu_var(last_task); + put_cpu_var(last_data); } - put_cpu_var(last_task); - put_cpu_var(last_data); if (data->config & AFL_SNAPSHOT_MMAP) clean_snapshot_vmas(data); + // we need to always be a single item behind, otherwise we have a use after free! + struct snapshot_page *prev_sp = NULL; + hash_for_each(data->ss.ss_page, i, sp, next) { + if (prev_sp != NULL) { + hash_del(&prev_sp->next); + kfree(prev_sp); + prev_sp = NULL; + } if (sp->page_data != NULL) kfree(sp->page_data); + prev_sp = sp; + } - kfree(sp); - + if (prev_sp != NULL) { + hash_del(&prev_sp->next); + kfree(prev_sp); + prev_sp = NULL; } } @@ -579,7 +618,8 @@ static long return_0_stub_func(void) { } -int wp_page_hook(struct kprobe *p, struct pt_regs *regs) { +int wp_page_hook(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, ftrace_regs_ptr regs) { struct vm_fault * vmf; struct mm_struct * mm; @@ -589,27 +629,31 @@ int wp_page_hook(struct kprobe *p, struct pt_regs *regs) { pte_t entry; char * vfrom; - vmf = (struct vm_fault *)regs->di; + struct pt_regs* pregs = ftrace_get_regs(regs); + + vmf = (struct vm_fault *)pregs->di; mm = vmf->vma->vm_mm; ss_page = NULL; - if (get_cpu_var(last_task) == mm->owner) { + struct task_struct* ltask = get_cpu_var(last_task); + if (ltask == mm->owner) { // fast path data = get_cpu_var(last_data); - + put_cpu_var(last_task); + put_cpu_var(last_data); } else { // query the radix tree data = get_task_data(mm->owner); get_cpu_var(last_task) = mm->owner; get_cpu_var(last_data) = data; + put_cpu_var(last_task); + put_cpu_var(last_task); + put_cpu_var(last_data); } - put_cpu_var(last_task); - put_cpu_var(last_data); // not needed? - if (data && have_snapshot(data)) { ss_page = get_snapshot_page(data, vmf->address & PAGE_MASK); @@ -628,9 +672,15 @@ int wp_page_hook(struct kprobe *p, struct pt_regs *regs) { if (ss_page->dirty) return 0; ss_page->dirty = true; + if (ss_page->in_dirty_list) { + WARNF("0x%016lx: Adding page to dirty list, but it's already there??? (dirty: %d, copied: %d)", ss_page->page_base, ss_page->dirty, ss_page->has_been_copied); + } else { + ss_page->in_dirty_list = true; + list_add_tail(&ss_page->dirty_list, &data->ss.dirty_pages); + } DBG_PRINT("wp_page_hook 0x%08lx", vmf->address); - + // dump_stack(); /* the page has been copied? * the page becomes COW page again. we do not need to take care of it. */ @@ -675,7 +725,7 @@ int wp_page_hook(struct kprobe *p, struct pt_regs *regs) { pte_unmap_unlock(vmf->pte, vmf->ptl); // skip original function - regs->ip = (long unsigned int)&return_0_stub_func; + pregs->ip = (long unsigned int)&return_0_stub_func; return 1; } @@ -686,7 +736,8 @@ int wp_page_hook(struct kprobe *p, struct pt_regs *regs) { // actually hooking page_add_new_anon_rmap, but we really only care about calls // from do_anonymous_page -int do_anonymous_hook(struct kprobe *p, struct pt_regs *regs) { +int do_anonymous_hook(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, ftrace_regs_ptr regs) { struct vm_area_struct *vma; struct mm_struct * mm; @@ -694,28 +745,32 @@ int do_anonymous_hook(struct kprobe *p, struct pt_regs *regs) { struct snapshot_page * ss_page; unsigned long address; - vma = (struct vm_area_struct *)regs->si; - address = regs->dx; + struct pt_regs* pregs = ftrace_get_regs(regs); + + vma = (struct vm_area_struct *)pregs->si; + address = pregs->dx; mm = vma->vm_mm; ss_page = NULL; - if (get_cpu_var(last_task) == mm->owner) { + struct task_struct* ltask = get_cpu_var(last_task); + if (ltask == mm->owner) { // fast path data = get_cpu_var(last_data); - + put_cpu_var(last_task); + put_cpu_var(last_data); } else { // query the radix tree data = get_task_data(mm->owner); get_cpu_var(last_task) = mm->owner; get_cpu_var(last_data) = data; + put_cpu_var(last_task); + put_cpu_var(last_task); + put_cpu_var(last_data); } - put_cpu_var(last_task); - put_cpu_var(last_data); // not needed? - if (data && have_snapshot(data)) { ss_page = get_snapshot_page(data, address & PAGE_MASK); @@ -734,11 +789,86 @@ int do_anonymous_hook(struct kprobe *p, struct pt_regs *regs) { } DBG_PRINT("do_anonymous_page 0x%08lx", address); + // dump_stack(); // HAVE PTE NOW ss_page->has_had_pte = true; + if (is_snapshot_page_none_pte(ss_page)) { + if (ss_page->in_dirty_list) { + WARNF("0x%016lx: Adding page to dirty list, but it's already there??? (dirty: %d, copied: %d)", ss_page->page_base, ss_page->dirty, ss_page->has_been_copied); + } else { + ss_page->in_dirty_list = true; + list_add_tail(&ss_page->dirty_list, &data->ss.dirty_pages); + } + } return 0; } +void finish_fault_hook(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, ftrace_regs_ptr regs) +{ + struct pt_regs* pregs = ftrace_get_regs(regs); + struct vm_fault *vmf = (struct vm_fault*)pregs->di; + struct vm_area_struct *vma; + struct mm_struct * mm; + struct task_data * data; + struct snapshot_page * ss_page; + unsigned long address; + + vma = vmf->vma; + address = vmf->address; + + struct task_struct* ltask = get_cpu_var(last_task); + if (ltask == mm->owner) { + + // fast path + data = get_cpu_var(last_data); + put_cpu_var(last_task); + put_cpu_var(last_data); + } else { + + // query the radix tree + data = get_task_data(mm->owner); + get_cpu_var(last_task) = mm->owner; + get_cpu_var(last_data) = data; + put_cpu_var(last_task); + put_cpu_var(last_task); + put_cpu_var(last_data); + + } + + if (data && have_snapshot(data)) { + + ss_page = get_snapshot_page(data, address & PAGE_MASK); + + } else { + + return; + + } + + if (!ss_page) { + + /* not a snapshot'ed page */ + return; + + } + + DBG_PRINT("finish_fault 0x%08lx", address); + dump_stack(); + + // HAVE PTE NOW + ss_page->has_had_pte = true; + if (is_snapshot_page_none_pte(ss_page)) { + if (ss_page->in_dirty_list) { + WARNF("0x%016lx: Adding page to dirty list, but it's already there???", ss_page->page_base); + } else { + ss_page->in_dirty_list = true; + list_add_tail(&ss_page->dirty_list, &data->ss.dirty_pages); + } + } + + return; +} diff --git a/src/module.c b/src/module.c index e3303b6..f0b0c74 100644 --- a/src/module.c +++ b/src/module.c @@ -17,7 +17,8 @@ #include "hook.h" // function hooking #include "snapshot.h" // main implementation #include "debug.h" -#include "symbols.h" +// #include "symbols.h" +#include "ftrace_helper.h" #include "afl_snapshot.h" @@ -129,7 +130,7 @@ static struct file_operations dev_fops = { }; -#ifdef ARCH_HAS_SYSCALL_WRAPPER +#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER typedef int (*syscall_handler_t)(struct pt_regs *); // The original syscall handler that we removed to override exit_group() @@ -144,6 +145,16 @@ syscall_handler_t orig_sct_exit_group = NULL; asmlinkage int sys_exit_group(struct pt_regs *regs) { + // SAYF("hooked sys_exit_group(%p)\n", regs); + // enum show_regs_mode print_kernel_regs; + + // show_regs_print_info(LOGLEVEL_INFO); + + // print_kernel_regs = user_mode(regs) ? SHOW_REGS_USER : SHOW_REGS_ALL; + // __show_regs(regs, print_kernel_regs, LOGLEVEL_INFO); + // int ret = exit_snapshot(); + // SAYF("exit_snapshot() = %d\n", ret); + // return orig_sct_exit_group(regs); if (exit_snapshot()) return orig_sct_exit_group(regs); return 0; @@ -163,84 +174,20 @@ asmlinkage long sys_exit_group(int error_code) { } #endif + +static struct ftrace_hook syscall_hooks[] = { + SYSCALL_HOOK("sys_exit_group", sys_exit_group, &orig_sct_exit_group), +}; + #if LINUX_VERSION_CODE >= KERNEL_VERSION(5,8,0) /* rename since Linux 5.8 */ #define probe_kernel_read copy_from_kernel_nofault #endif -static void **get_syscall_table(void) { - - void **syscall_table = NULL; - - syscall_table = (void**)SYMADDR_sys_call_table; - - if (syscall_table) { return syscall_table; } - - int i; - unsigned long long s0 = SYMADDR_sys_read; - unsigned long long s1 = SYMADDR_sys_read; - - unsigned long long *data = - (unsigned long long *)(SYMADDR__etext & ~0x7); - for (i = 0; (unsigned long long)(&data[i]) < ULLONG_MAX; i++) { - - unsigned long long d; - // use probe_kernel_read so we don't fault - if (probe_kernel_read(&d, &data[i], sizeof(d))) { continue; } - - if (d == s0 && data[i + 1] == s1) { - - syscall_table = (void **)(&data[i]); - break; - - } - - } - - return syscall_table; - -} - -static void _write_cr0(unsigned long val) { - - asm volatile("mov %0,%%cr0" : "+r"(val)); - -} - -static void enable_write_protection(void) { - - _write_cr0(read_cr0() | (1 << 16)); - -} - -static void disable_write_protection(void) { - - _write_cr0(read_cr0() & (~(1 << 16))); - -} - -static void **syscall_table_ptr; - -static void patch_syscall_table(void) { - - disable_write_protection(); - orig_sct_exit_group = syscall_table_ptr[__NR_exit_group]; - syscall_table_ptr[__NR_exit_group] = &sys_exit_group; - enable_write_protection(); - -} - -static void unpatch_syscall_table(void) { - - disable_write_protection(); - syscall_table_ptr[__NR_exit_group] = orig_sct_exit_group; - enable_write_protection(); - -} - +// TODO(galli-leo): we should be able to just use kallsyms_lookup_name now. int snapshot_initialize_k_funcs() { - k_flush_tlb_mm_range = (void *)SYMADDR_flush_tlb_mm_range; - k_zap_page_range = (void *)SYMADDR_zap_page_range; + k_flush_tlb_mm_range = (void *)kallsyms_lookup_name("flush_tlb_mm_range"); + k_zap_page_range = (void *)kallsyms_lookup_name("zap_page_range"); if (!k_flush_tlb_mm_range || !k_zap_page_range) { return -ENOENT; } @@ -250,6 +197,9 @@ int snapshot_initialize_k_funcs() { } +void finish_fault_hook(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, ftrace_regs_ptr regs); + static int __init mod_init(void) { SAYF("Loading AFL++ snapshot LKM"); @@ -291,22 +241,16 @@ static int __init mod_init(void) { SAYF("The major device number is %d", mod_major_num); - // syscall_table overwrites - syscall_table_ptr = get_syscall_table(); - if (!syscall_table_ptr) { - - FATAL("Unable to locate syscall_table"); - return -ENOENT; - - } - - patch_syscall_table(); + int err; + err = fh_install_hooks(syscall_hooks, ARRAY_SIZE(syscall_hooks)); + if(err) + return err; // func hooks if (!try_hook("do_wp_page", &wp_page_hook)) { FATAL("Unable to hook do_wp_page"); - unpatch_syscall_table(); + // unpatch_syscall_table(); return -ENOENT; @@ -317,21 +261,31 @@ static int __init mod_init(void) { FATAL("Unable to hook page_add_new_anon_rmap"); unhook_all(); - unpatch_syscall_table(); + // unpatch_syscall_table(); return -ENOENT; } + // return 0; + if (!try_hook("do_exit", &exit_hook)) { FATAL("Unable to hook do_exit"); unhook_all(); - unpatch_syscall_table(); + // unpatch_syscall_table(); return -ENOENT; } + // if (!try_hook("finish_fault", &finish_fault_hook)) { + // FATAL("Unable to hook handle_pte_fault"); + + // unhook_all(); + // // unpatch_syscall_table(); + // return -ENOENT; + // } + // initialize snapshot non-exported funcs return snapshot_initialize_k_funcs(); @@ -349,7 +303,7 @@ static void __exit mod_exit(void) { unregister_chrdev(mod_major_num, DEVICE_NAME); unhook_all(); - unpatch_syscall_table(); + fh_remove_hooks(syscall_hooks, ARRAY_SIZE(syscall_hooks)); } diff --git a/src/snapshot.c b/src/snapshot.c index faa32f3..703c86b 100644 --- a/src/snapshot.c +++ b/src/snapshot.c @@ -3,8 +3,8 @@ #include "task_data.h" #include "snapshot.h" -int exit_hook(struct kprobe *p, struct pt_regs *regs) { - +int exit_hook(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, ftrace_regs_ptr regs) { clean_snapshot(); return 0; @@ -44,7 +44,7 @@ int take_snapshot(int config) { initialize_snapshot(data, config); take_memory_snapshot(data); - take_files_snapshot(data); + // take_files_snapshot(data); return 1; @@ -74,7 +74,7 @@ void restore_snapshot(struct task_data *data) { recover_threads_snapshot(data); recover_memory_snapshot(data); - recover_files_snapshot(data); + // recover_files_snapshot(data); recover_state(data); } @@ -87,7 +87,6 @@ void recover_snapshot(void) { } int exit_snapshot(void) { - struct task_data *data = get_task_data(current); if (data && (data->config & AFL_SNAPSHOT_EXIT) && have_snapshot(data)) { @@ -108,7 +107,7 @@ void clean_snapshot(void) { if (!data) { return; } clean_memory_snapshot(data); - clean_files_snapshot(data); + // clean_files_snapshot(data); clear_snapshot(data); remove_task_data(data); diff --git a/src/snapshot.h b/src/snapshot.h index afb03f2..3fce9f0 100644 --- a/src/snapshot.h +++ b/src/snapshot.h @@ -37,7 +37,9 @@ #include #include #include +#undef MODULE #include +#define MODULE 1 #include #include #include @@ -85,15 +87,20 @@ #include #include +#undef MODULE #include #include #include #include -#include + #include +#include +#define MODULE 1 #include +#include #include "afl_snapshot.h" +#include "ftrace_util.h" struct task_data; @@ -121,9 +128,12 @@ struct snapshot_page { bool has_been_copied; bool has_had_pte; bool dirty; + bool in_dirty_list; struct hlist_node next; + struct list_head dirty_list; + }; #define SNAPSHOT_PRIVATE 0x00000001 @@ -179,6 +189,8 @@ struct snapshot { DECLARE_HASHTABLE(ss_page, SNAPSHOT_HASHTABLE_SZ); + struct list_head dirty_pages; + }; #define SNAPSHOT_NONE 0x00000000 // outside snapshot @@ -205,9 +217,12 @@ void recover_threads_snapshot(struct task_data *data); int snapshot_initialize_k_funcs(void); -int wp_page_hook(struct kprobe *p, struct pt_regs *regs); -int do_anonymous_hook(struct kprobe *p, struct pt_regs *regs); -int exit_hook(struct kprobe *p, struct pt_regs *regs); +int wp_page_hook(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, ftrace_regs_ptr regs); +int do_anonymous_hook(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, ftrace_regs_ptr regs); +int exit_hook(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, ftrace_regs_ptr regs); int take_snapshot(int config); void recover_snapshot(void); diff --git a/src/task_data.c b/src/task_data.c index 01c21c0..08dd62c 100644 --- a/src/task_data.c +++ b/src/task_data.c @@ -1,5 +1,6 @@ #include "task_data.h" #include +#include "debug.h" LIST_HEAD(task_datas); static spinlock_t task_datas_lock; @@ -7,12 +8,25 @@ static spinlock_t task_datas_lock; static void task_data_free_callback(struct rcu_head *rcu) { struct task_data *data = container_of(rcu, struct task_data, rcu); - kfree(data); + struct vmrange_node *n = data->blocklist; + while (n) { + data->blocklist = n->next; + kfree(n); + n = data->blocklist; + } + data->blocklist = NULL; + n = data->allowlist; + while (n) { + data->allowlist = n->next; + kfree(n); + n = data->allowlist; + } + data->allowlist = NULL; + kfree(data); } struct task_data *get_task_data(const struct task_struct *tsk) { - struct task_data *data = NULL; rcu_read_lock(); @@ -39,10 +53,12 @@ struct task_data *ensure_task_data(const struct task_struct *tsk) { if (data) return data; // XXX: this is academic code (tm) so if we run out of memory, too bad! + // TODO: Not sure if this is still the case? should be freed correctly. data = kmalloc(sizeof(struct task_data), GFP_KERNEL | __GFP_ZERO); if (!data) return NULL; data->tsk = tsk; + INIT_LIST_HEAD(&data->ss.dirty_pages); spin_lock(&task_datas_lock); list_add_rcu(&data->list, &task_datas); diff --git a/test/Makefile b/test/Makefile new file mode 100644 index 0000000..3e2a1f9 --- /dev/null +++ b/test/Makefile @@ -0,0 +1,2 @@ +test3: test3.c Makefile + gcc -I ../include -g ../lib/libaflsnapshot.o test3.c -o test3 \ No newline at end of file diff --git a/test/test2 b/test/test2 new file mode 100755 index 0000000..04559b7 Binary files /dev/null and b/test/test2 differ diff --git a/test/test2.c b/test/test2.c new file mode 100644 index 0000000..e4edb15 --- /dev/null +++ b/test/test2.c @@ -0,0 +1,61 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "libaflsnapshot.h" + +// gcc -I ../include -g ../lib/libaflsnapshot.o test2.c -o test2 + +int* shm_addr; +int* none_addr; + +int pippo = 1; + +void test2() { + + if (afl_snapshot_take(AFL_SNAPSHOT_NOSTACK) == 1) + fprintf(stderr, "first time!\n"); + +loop: + + *none_addr += 1; + *shm_addr += 1; + fprintf(stderr, ">> %d %p = %d %p = %d\n", pippo, none_addr, *none_addr, shm_addr, *shm_addr); + ++pippo; + + afl_snapshot_restore(); + goto loop; + +} + +int main() { + + afl_snapshot_init(); + + shm_addr = mmap(0, 0x10000, PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_SHARED | MAP_ANONYMOUS, 0, 0); + + none_addr = mmap((void *)0, 0x1000, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + afl_snapshot_exclude_vmrange((unsigned long)none_addr, (unsigned long)(none_addr + (0x1000/4))); + afl_snapshot_include_vmrange((unsigned long)shm_addr, (unsigned long)(shm_addr + (0x10000/4))); + + *shm_addr = 0; + + test2(); + + return 0; + +} + + diff --git a/test/test3.c b/test/test3.c new file mode 100644 index 0000000..62fd757 --- /dev/null +++ b/test/test3.c @@ -0,0 +1,213 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "libaflsnapshot.h" + +#define PAGE_SIZE 0x1000 + +#define PAGE_ALIGN(addr) ((uint64_t)addr & (~(PAGE_SIZE-1))) + +#define NUM_PAGES 0x2 + +#define NUM_WRITES 0x4 + +#define NUM_LOOPS 0x2 + +#define PRE_FORK_BASE 0x30000000 +#define POST_FORK_BASE 0x40000000 +#define POST_SNAP_BASE 0x50000000 + +void* pre_fork_pages[NUM_PAGES] = {0}; +void* post_fork_pages[NUM_PAGES] = {0}; +void* post_snapshot[NUM_PAGES] = {0}; + +// gcc -I ../include -g ../lib/libaflsnapshot.o test2.c -o test2 + +void randomly_map(void* pages[], void* base) { + for (int i = 0; i < NUM_PAGES; i++) { + void* fixed_addr = base + i * PAGE_SIZE; + void* addr = mmap((void*)fixed_addr, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, 0, 0); + if (addr != fixed_addr) { + fprintf(stderr, "Failed to map page to %p\n", fixed_addr); + exit(1); + } + memset(addr, 0x42, PAGE_SIZE); + pages[i] = addr; + } +} + +void unmap_pages(void* pages[]) { + for (int i = 0; i < NUM_PAGES; i++) { + void* addr = pages[i]; + munmap(addr, PAGE_SIZE); + } +} + +void randomly_incl_excl(void* pages[]) { + for (int i = 0; i < NUM_PAGES; i++) { + long int result = random(); + void* addr = pages[i]; + if (i % 2 == 1) { + afl_snapshot_exclude_vmrange(addr, addr + PAGE_SIZE); + } else { + afl_snapshot_include_vmrange(addr, addr + PAGE_SIZE); + } + } +} + +void random_write() +{ + int rand_idx = random() % NUM_PAGES; + int rand_page_off = random() % (PAGE_SIZE / 8); // we write a qword + int rand_arr = random() % 3; + void* page = 0; + switch (rand_arr) + { + case 0: + page = pre_fork_pages[rand_idx]; + break; + case 1: + page = post_fork_pages[rand_idx]; + break; + case 2: + page = post_snapshot[rand_idx]; + break; + default: + fprintf(stderr, "DAFUQ: %d\n", rand_arr); + break; + } + void* rand_loc = page + rand_page_off; + *((uint64_t*)rand_loc) = 0x6969696969696969; +} + +void random_writes() +{ + for (int j = 0; j < NUM_WRITES; j++) { + random_write(); + } +} + +void check_page(void* page) +{ + uint64_t* conts = (uint64_t*)page; + if (*conts != 0x4242424242424242) { + fprintf(stderr, "ERROR (%p) not correctly restored: %p\n", page, *conts); + } +} + +void test3() { + + if (afl_snapshot_take(AFL_SNAPSHOT_BLOCK | AFL_SNAPSHOT_FDS) == 1) + fprintf(stderr, "first time!\n"); + + for (int i = 0; i < NUM_LOOPS; i++) { + if (afl_snapshot_take(AFL_SNAPSHOT_BLOCK | AFL_SNAPSHOT_FDS) == 1) + fprintf(stderr, "first time!\n"); + printf("Current Loop: %d\n", i); + random_writes(); + afl_snapshot_restore(); + random_writes(); + afl_snapshot_restore(); + afl_snapshot_restore(); + random_write(); + } +} + +void not_random_writes(void* pages[]) +{ + for (int j = 0; j < NUM_PAGES; j++) { + void* addr = pages[j]; + *(uint64_t*)addr = 0x6868686868686868; + } +} + +void not_random_writes_all() +{ + not_random_writes(pre_fork_pages); + not_random_writes(post_fork_pages); + not_random_writes(post_snapshot); +} + +void test3b() { + if (afl_snapshot_take(AFL_SNAPSHOT_BLOCK | AFL_SNAPSHOT_FDS) == 1) + fprintf(stderr, "first time!\n"); + + for (int i = 0; i < NUM_LOOPS; i++) { + if (afl_snapshot_take(AFL_SNAPSHOT_BLOCK | AFL_SNAPSHOT_FDS) == 1) + fprintf(stderr, "first time!\n"); + printf("Current Loop: %d\n", i); + not_random_writes_all(); + afl_snapshot_restore(); + check_page(pre_fork_pages[0]); + not_random_writes_all(); + afl_snapshot_restore(); + afl_snapshot_restore(); + not_random_writes_all(); + } +} + +void print_maps() +{ + printf("\nMAPS\n\n"); + int fd = open("/proc/self/maps", O_RDONLY); + char buf[4096]; + int size = 1; + while (size > 0) { + size = read(fd, buf, 4096); + write(2, buf, size); + } + printf("\n\n"); +} + +int main() { + randomly_map(pre_fork_pages, PRE_FORK_BASE); + printf("First random page: %p\n", pre_fork_pages[0]); + pid_t pid = fork(); + if (pid == 0) { + printf("In child!\n"); + print_maps(); + afl_snapshot_init(); + randomly_map(post_fork_pages, POST_FORK_BASE); + randomly_incl_excl(pre_fork_pages); + randomly_incl_excl(post_fork_pages); + void* data_start = &pre_fork_pages[0]; + void* data_end = &post_snapshot[NUM_PAGES-1]; + printf("DATA: %p - %p\n", data_start, data_end); + data_start = PAGE_ALIGN(data_start); + data_end = PAGE_ALIGN(data_end) + PAGE_SIZE; + printf("DATA: %p - %p\n", data_start, data_end); + afl_snapshot_exclude_vmrange(data_start, data_end); + if (afl_snapshot_take(AFL_SNAPSHOT_NOSTACK) == 1) + fprintf(stderr, "first time!\n"); + randomly_map(post_snapshot, POST_SNAP_BASE); + randomly_incl_excl(post_snapshot); + test3b(); + unmap_pages(pre_fork_pages); + // test3b(); + // void* first_addr = pre_fork_pages[0]; + // *((uint64_t*)first_addr) = 0x6868686868686868; + // print_maps(); + // test3(); + } else { + printf("In parent, waiting on child...\n"); + int status = 0; + waitpid(pid, &status, 0); + printf("Child exited: %d, going as well\n", status); + } + + return 0; +} + +