/* * sigreturn.c - tests for x86 sigreturn(2) and exit-to-userspace * Copyright (c) 2014-2015 Andrew Lutomirski * * This program is free software; you can redistribute it and/or modify * it under the terms and conditions of the GNU General Public License, * version 2, as published by the Free Software Foundation. * * This program is distributed in the hope it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * This is a series of tests that exercises the sigreturn(2) syscall and * the IRET / SYSRET paths in the kernel. * * For now, this focuses on the effects of unusual CS and SS values, * and it has a bunch of tests to make sure that ESP/RSP is restored * properly. * * The basic idea behind these tests is to raise(SIGUSR1) to create a * sigcontext frame, plug in the values to be tested, and then return, * which implicitly invokes sigreturn(2) and programs the user context * as desired. * * For tests for which we expect sigreturn and the subsequent return to * user mode to succeed, we return to a short trampoline that generates * SIGTRAP so that the meat of the tests can be ordinary C code in a * SIGTRAP handler. * * The inner workings of each test is documented below. * * Do not run on outdated, unpatched kernels at risk of nasty crashes. */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * In principle, this test can run on Linux emulation layers (e.g. * Illumos "LX branded zones"). Solaris-based kernels reserve LDT * entries 0-5 for their own internal purposes, so start our LDT * allocations above that reservation. (The tests don't pass on LX * branded zones, but at least this lets them run.) */ #define LDT_OFFSET 6 /* An aligned stack accessible through some of our segments. */ static unsigned char stack16[65536] __attribute__((aligned(4096))); /* * An aligned int3 instruction used as a trampoline. Some of the tests * want to fish out their ss values, so this trampoline copies ss to eax * before the int3. */ asm (".pushsection .text\n\t" ".type int3, @function\n\t" ".align 4096\n\t" "int3:\n\t" "mov %ss,%eax\n\t" "int3\n\t" ".size int3, . - int3\n\t" ".align 4096, 0xcc\n\t" ".popsection"); extern char int3[4096]; /* * At startup, we prepapre: * * - ldt_nonexistent_sel: An LDT entry that doesn't exist (all-zero * descriptor or out of bounds). * - code16_sel: A 16-bit LDT code segment pointing to int3. * - data16_sel: A 16-bit LDT data segment pointing to stack16. * - npcode32_sel: A 32-bit not-present LDT code segment pointing to int3. * - npdata32_sel: A 32-bit not-present LDT data segment pointing to stack16. * - gdt_data16_idx: A 16-bit GDT data segment pointing to stack16. * - gdt_npdata32_idx: A 32-bit not-present GDT data segment pointing to * stack16. * * For no particularly good reason, xyz_sel is a selector value with the * RPL and LDT bits filled in, whereas xyz_idx is just an index into the * descriptor table. These variables will be zero if their respective * segments could not be allocated. */ static unsigned short ldt_nonexistent_sel; static unsigned short code16_sel, data16_sel, npcode32_sel, npdata32_sel; static unsigned short gdt_data16_idx, gdt_npdata32_idx; static unsigned short GDT3(int idx) { return (idx << 3) | 3; } static unsigned short LDT3(int idx) { return (idx << 3) | 7; } /* Our sigaltstack scratch space. */ static char altstack_data[SIGSTKSZ]; static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), int flags) { struct sigaction sa; memset(&sa, 0, sizeof(sa)); sa.sa_sigaction = handler; sa.sa_flags = SA_SIGINFO | flags; sigemptyset(&sa.sa_mask); if (sigaction(sig, &sa, 0)) err(1, "sigaction"); } static void clearhandler(int sig) { struct sigaction sa; memset(&sa, 0, sizeof(sa)); sa.sa_handler = SIG_DFL; sigemptyset(&sa.sa_mask); if (sigaction(sig, &sa, 0)) err(1, "sigaction"); } static void add_ldt(const struct user_desc *desc, unsigned short *var, const char *name) { if (syscall(SYS_modify_ldt, 1, desc, sizeof(*desc)) == 0) { *var = LDT3(desc->entry_number); } else { printf("[NOTE]\tFailed to create %s segment\n", name); *var = 0; } } static void setup_ldt(void) { if ((unsigned long)stack16 > (1ULL << 32) - sizeof(stack16)) errx(1, "stack16 is too high\n"); if ((unsigned long)int3 > (1ULL << 32) - sizeof(int3)) errx(1, "int3 is too high\n"); ldt_nonexistent_sel = LDT3(LDT_OFFSET + 2); const struct user_desc code16_desc = { .entry_number = LDT_OFFSET + 0, .base_addr = (unsigned long)int3, .limit = 4095, .seg_32bit = 0, .contents = 2, /* Code, not conforming */ .read_exec_only = 0, .limit_in_pages = 0, .seg_not_present = 0, .useable = 0 }; add_ldt(&code16_desc, &code16_sel, "code16"); const struct user_desc data16_desc = { .entry_number = LDT_OFFSET + 1, .base_addr = (unsigned long)stack16, .limit = 0xffff, .seg_32bit = 0, .contents = 0, /* Data, grow-up */ .read_exec_only = 0, .limit_in_pages = 0, .seg_not_present = 0, .useable = 0 }; add_ldt(&data16_desc, &data16_sel, "data16"); const struct user_desc npcode32_desc = { .entry_number = LDT_OFFSET + 3, .base_addr = (unsigned long)int3, .limit = 4095, .seg_32bit = 1, .contents = 2, /* Code, not conforming */ .read_exec_only = 0, .limit_in_pages = 0, .seg_not_present = 1, .useable = 0 }; add_ldt(&npcode32_desc, &npcode32_sel, "npcode32"); const struct user_desc npdata32_desc = { .entry_number = LDT_OFFSET + 4, .base_addr = (unsigned long)stack16, .limit = 0xffff, .seg_32bit = 1, .contents = 0, /* Data, grow-up */ .read_exec_only = 0, .limit_in_pages = 0, .seg_not_present = 1, .useable = 0 }; add_ldt(&npdata32_desc, &npdata32_sel, "npdata32"); struct user_desc gdt_data16_desc = { .entry_number = -1, .base_addr = (unsigned long)stack16, .limit = 0xffff, .seg_32bit = 0, .contents = 0, /* Data, grow-up */ .read_exec_only = 0, .limit_in_pages = 0, .seg_not_present = 0, .useable = 0 }; if (syscall(SYS_set_thread_area, &gdt_data16_desc) == 0) { /* * This probably indicates vulnerability to CVE-2014-8133. * Merely getting here isn't definitive, though, and we'll * diagnose the problem for real later on. */ printf("[WARN]\tset_thread_area allocated data16 at index %d\n", gdt_data16_desc.entry_number); gdt_data16_idx = gdt_data16_desc.entry_number; } else { printf("[OK]\tset_thread_area refused 16-bit data\n"); } struct user_desc gdt_npdata32_desc = { .entry_number = -1, .base_addr = (unsigned long)stack16, .limit = 0xffff, .seg_32bit = 1, .contents = 0, /* Data, grow-up */ .read_exec_only = 0, .limit_in_pages = 0, .seg_not_present = 1, .useable = 0 }; if (syscall(SYS_set_thread_area, &gdt_npdata32_desc) == 0) { /* * As a hardening measure, newer kernels don't allow this. */ printf("[WARN]\tset_thread_area allocated npdata32 at index %d\n", gdt_npdata32_desc.entry_number); gdt_npdata32_idx = gdt_npdata32_desc.entry_number; } else { printf("[OK]\tset_thread_area refused 16-bit data\n"); } } /* State used by our signal handlers. */ static gregset_t initial_regs, requested_regs, resulting_regs; /* Instructions for the SIGUSR1 handler. */ static volatile unsigned short sig_cs, sig_ss; static volatile sig_atomic_t sig_trapped, sig_err, sig_trapno; /* Abstractions for some 32-bit vs 64-bit differences. */ #ifdef __x86_64__ # define REG_IP REG_RIP # define REG_SP REG_RSP # define REG_AX REG_RAX struct selectors { unsigned short cs, gs, fs, ss; }; static unsigned short *ssptr(ucontext_t *ctx) { struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS]; return &sels->ss; } static unsigned short *csptr(ucontext_t *ctx) { struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS]; return &sels->cs; } #else # define REG_IP REG_EIP # define REG_SP REG_ESP # define REG_AX REG_EAX static greg_t *ssptr(ucontext_t *ctx) { return &ctx->uc_mcontext.gregs[REG_SS]; } static greg_t *csptr(ucontext_t *ctx) { return &ctx->uc_mcontext.gregs[REG_CS]; } #endif /* Number of errors in the current test case. */ static volatile sig_atomic_t nerrs; /* * SIGUSR1 handler. Sets CS and SS as requested and points IP to the * int3 trampoline. Sets SP to a large known value so that we can see * whether the value round-trips back to user mode correctly. */ static void sigusr1(int sig, siginfo_t *info, void *ctx_void) { ucontext_t *ctx = (ucontext_t*)ctx_void; memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); *csptr(ctx) = sig_cs; *ssptr(ctx) = sig_ss; ctx->uc_mcontext.gregs[REG_IP] = sig_cs == code16_sel ? 0 : (unsigned long)&int3; ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL; ctx->uc_mcontext.gregs[REG_AX] = 0; memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); requested_regs[REG_AX] = *ssptr(ctx); /* The asm code does this. */ return; } /* * Called after a successful sigreturn. Restores our state so that * the original raise(SIGUSR1) returns. */ static void sigtrap(int sig, siginfo_t *info, void *ctx_void) { ucontext_t *ctx = (ucontext_t*)ctx_void; sig_err = ctx->uc_mcontext.gregs[REG_ERR]; sig_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO]; unsigned short ss; asm ("mov %%ss,%0" : "=r" (ss)); greg_t asm_ss = ctx->uc_mcontext.gregs[REG_AX]; if (asm_ss != sig_ss && sig == SIGTRAP) { /* Sanity check failure. */ printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n", ss, *ssptr(ctx), (unsigned long long)asm_ss); nerrs++; } memcpy(&resulting_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t)); sig_trapped = sig; } /* * Checks a given selector for its code bitness or returns -1 if it's not * a usable code segment selector. */ int cs_bitness(unsigned short cs) { uint32_t valid = 0, ar; asm ("lar %[cs], %[ar]\n\t" "jnz 1f\n\t" "mov $1, %[valid]\n\t" "1:" : [ar] "=r" (ar), [valid] "+rm" (valid) : [cs] "r" (cs)); if (!valid) return -1; bool db = (ar & (1 << 22)); bool l = (ar & (1 << 21)); if (!(ar & (1<<11))) return -1; /* Not code. */ if (l && !db) return 64; else if (!l && db) return 32; else if (!l && !db) return 16; else return -1; /* Unknown bitness. */ } /* Finds a usable code segment of the requested bitness. */ int find_cs(int bitness) { unsigned short my_cs; asm ("mov %%cs,%0" : "=r" (my_cs)); if (cs_bitness(my_cs) == bitness) return my_cs; if (cs_bitness(my_cs + (2 << 3)) == bitness) return my_cs + (2 << 3); if (my_cs > (2<<3) && cs_bitness(my_cs - (2 << 3)) == bitness) return my_cs - (2 << 3); if (cs_bitness(code16_sel) == bitness) return code16_sel; printf("[WARN]\tCould not find %d-bit CS\n", bitness); return -1; } static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss) { int cs = find_cs(cs_bits); if (cs == -1) { printf("[SKIP]\tCode segment unavailable for %d-bit CS, %d-bit SS\n", cs_bits, use_16bit_ss ? 16 : 32); return 0; } if (force_ss != -1) { sig_ss = force_ss; } else { if (use_16bit_ss) { if (!data16_sel) { printf("[SKIP]\tData segment unavailable for %d-bit CS, 16-bit SS\n", cs_bits); return 0; } sig_ss = data16_sel; } else { asm volatile ("mov %%ss,%0" : "=r" (sig_ss)); } } sig_cs = cs; printf("[RUN]\tValid sigreturn: %d-bit CS (%hx), %d-bit SS (%hx%s)\n", cs_bits, sig_cs, use_16bit_ss ? 16 : 32, sig_ss, (sig_ss & 4) ? "" : ", GDT"); raise(SIGUSR1); nerrs = 0; /* * Check that each register had an acceptable value when the * int3 trampoline was invoked. */ for (int i = 0; i < NGREG; i++) { greg_t req = requested_regs[i], res = resulting_regs[i]; if (i == REG_TRAPNO || i == REG_IP) continue; /* don't care */ if (i == REG_SP) { /* * If we were using a 16-bit stack segment, then * the kernel is a bit stuck: IRET only restores * the low 16 bits of ESP/RSP if SS is 16-bit. * The kernel uses a hack to restore bits 31:16, * but that hack doesn't help with bits 63:32. * On Intel CPUs, bits 63:32 end up zeroed, and, on * AMD CPUs, they leak the high bits of the kernel * espfix64 stack pointer. There's very little that * the kernel can do about it. * * Similarly, if we are returning to a 32-bit context, * the CPU will often lose the high 32 bits of RSP. */ if (res == req) continue; if (cs_bits != 64 && ((res ^ req) & 0xFFFFFFFF) == 0) { printf("[NOTE]\tSP: %llx -> %llx\n", (unsigned long long)req, (unsigned long long)res); continue; } printf("[FAIL]\tSP mismatch: requested 0x%llx; got 0x%llx\n", (unsigned long long)requested_regs[i], (unsigned long long)resulting_regs[i]); nerrs++; continue; } bool ignore_reg = false; #if __i386__ if (i == REG_UESP) ignore_reg = true; #else if (i == REG_CSGSFS) { struct selectors *req_sels = (void *)&requested_regs[REG_CSGSFS]; struct selectors *res_sels = (void *)&resulting_regs[REG_CSGSFS]; if (req_sels->cs != res_sels->cs) { printf("[FAIL]\tCS mismatch: requested 0x%hx; got 0x%hx\n", req_sels->cs, res_sels->cs); nerrs++; } if (req_sels->ss != res_sels->ss) { printf("[FAIL]\tSS mismatch: requested 0x%hx; got 0x%hx\n", req_sels->ss, res_sels->ss); nerrs++; } continue; } #endif /* Sanity check on the kernel */ if (i == REG_AX && requested_regs[i] != resulting_regs[i]) { printf("[FAIL]\tAX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n", (unsigned long long)requested_regs[i], (unsigned long long)resulting_regs[i]); nerrs++; continue; } if (requested_regs[i] != resulting_regs[i] && !ignore_reg) { printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n", i, (unsigned long long)requested_regs[i], (unsigned long long)resulting_regs[i]); nerrs++; } } if (nerrs == 0) printf("[OK]\tall registers okay\n"); return nerrs; } static int test_bad_iret(int cs_bits, unsigned short ss, int force_cs) { int cs = force_cs == -1 ? find_cs(cs_bits) : force_cs; if (cs == -1) return 0; sig_cs = cs; sig_ss = ss; printf("[RUN]\t%d-bit CS (%hx), bogus SS (%hx)\n", cs_bits, sig_cs, sig_ss); sig_trapped = 0; raise(SIGUSR1); if (sig_trapped) { char errdesc[32] = ""; if (sig_err) { const char *src = (sig_err & 1) ? " EXT" : ""; const char *table; if ((sig_err & 0x6) == 0x0) table = "GDT"; else if ((sig_err & 0x6) == 0x4) table = "LDT"; else if ((sig_err & 0x6) == 0x2) table = "IDT"; else table = "???"; sprintf(errdesc, "%s%s index %d, ", table, src, sig_err >> 3); } char trapname[32]; if (sig_trapno == 13) strcpy(trapname, "GP"); else if (sig_trapno == 11) strcpy(trapname, "NP"); else if (sig_trapno == 12) strcpy(trapname, "SS"); else if (sig_trapno == 32) strcpy(trapname, "IRET"); /* X86_TRAP_IRET */ else sprintf(trapname, "%d", sig_trapno); printf("[OK]\tGot #%s(0x%lx) (i.e. %s%s)\n", trapname, (unsigned long)sig_err, errdesc, strsignal(sig_trapped)); return 0; } else { printf("[FAIL]\tDid not get SIGSEGV\n"); return 1; } } int main() { int total_nerrs = 0; unsigned short my_cs, my_ss; asm volatile ("mov %%cs,%0" : "=r" (my_cs)); asm volatile ("mov %%ss,%0" : "=r" (my_ss)); setup_ldt(); stack_t stack = { .ss_sp = altstack_data, .ss_size = SIGSTKSZ, }; if (sigaltstack(&stack, NULL) != 0) err(1, "sigaltstack"); sethandler(SIGUSR1, sigusr1, 0); sethandler(SIGTRAP, sigtrap, SA_ONSTACK); /* Easy cases: return to a 32-bit SS in each possible CS bitness. */ total_nerrs += test_valid_sigreturn(64, false, -1); total_nerrs += test_valid_sigreturn(32, false, -1); total_nerrs += test_valid_sigreturn(16, false, -1); /* * Test easy espfix cases: return to a 16-bit LDT SS in each possible * CS bitness. NB: with a long mode CS, the SS bitness is irrelevant. * * This catches the original missing-espfix-on-64-bit-kernels issue * as well as CVE-2014-8134. */ total_nerrs += test_valid_sigreturn(64, true, -1); total_nerrs += test_valid_sigreturn(32, true, -1); total_nerrs += test_valid_sigreturn(16, true, -1); if (gdt_data16_idx) { /* * For performance reasons, Linux skips espfix if SS points * to the GDT. If we were able to allocate a 16-bit SS in * the GDT, see if it leaks parts of the kernel stack pointer. * * This tests for CVE-2014-8133. */ total_nerrs += test_valid_sigreturn(64, true, GDT3(gdt_data16_idx)); total_nerrs += test_valid_sigreturn(32, true, GDT3(gdt_data16_idx)); total_nerrs += test_valid_sigreturn(16, true, GDT3(gdt_data16_idx)); } /* * We're done testing valid sigreturn cases. Now we test states * for which sigreturn itself will succeed but the subsequent * entry to user mode will fail. * * Depending on the failure mode and the kernel bitness, these * entry failures can generate SIGSEGV, SIGBUS, or SIGILL. */ clearhandler(SIGTRAP); sethandler(SIGSEGV, sigtrap, SA_ONSTACK); sethandler(SIGBUS, sigtrap, SA_ONSTACK); sethandler(SIGILL, sigtrap, SA_ONSTACK); /* 32-bit kernels do this */ /* Easy failures: invalid SS, resulting in #GP(0) */ test_bad_iret(64, ldt_nonexistent_sel, -1); test_bad_iret(32, ldt_nonexistent_sel, -1); test_bad_iret(16, ldt_nonexistent_sel, -1); /* These fail because SS isn't a data segment, resulting in #GP(SS) */ test_bad_iret(64, my_cs, -1); test_bad_iret(32, my_cs, -1); test_bad_iret(16, my_cs, -1); /* Try to return to a not-present code segment, triggering #NP(SS). */ test_bad_iret(32, my_ss, npcode32_sel); /* * Try to return to a not-present but otherwise valid data segment. * This will cause IRET to fail with #SS on the espfix stack. This * exercises CVE-2014-9322. * * Note that, if espfix is enabled, 64-bit Linux will lose track * of the actual cause of failure and report #GP(0) instead. * This would be very difficult for Linux to avoid, because * espfix64 causes IRET failures to be promoted to #DF, so the * original exception frame is never pushed onto the stack. */ test_bad_iret(32, npdata32_sel, -1); /* * Try to return to a not-present but otherwise valid data * segment without invoking espfix. Newer kernels don't allow * this to happen in the first place. On older kernels, though, * this can trigger CVE-2014-9322. */ if (gdt_npdata32_idx) test_bad_iret(32, GDT3(gdt_npdata32_idx), -1); return total_nerrs ? 1 : 0; }