The feature User-Mode Instruction Prevention present in recent Intel processor prevents a group of instructions from being executed with CPL > 0. Otherwise, a general protection fault is issued.
Rather than relaying this fault to the user space (in the form of a SIGSEGV signal), the instructions protected by UMIP can be emulated to provide dummy results. This allows to conserve the current kernel behavior and not reveal the system resources that UMIP intends to protect (the global descriptor and interrupt descriptor tables, the segment selectors of the local descriptor table and the task state and the machine status word).
This emulation is needed because certain applications (e.g., WineHQ) rely on this subset of instructions to function.
The instructions protected by UMIP can be split in two groups. Those who return a kernel memory address (sgdt and sidt) and those who return a value (sldt, str and smsw).
For the instructions that return a kernel memory address, the result is emulated as the location of a dummy variable in the kernel memory space. This is needed as applications such as WineHQ rely on the result being located in the kernel memory space function. The limit for the GDT and the IDT are set to zero.
The instructions sldt and str return a segment selector relative to the base address of the global descriptor table. Since the actual address of such table is not revealed, it makes sense to emulate the result as zero.
The instruction smsw is emulated to return zero.
Cc: Andy Lutomirski luto@kernel.org Cc: Andrew Morton akpm@linux-foundation.org Cc: H. Peter Anvin hpa@zytor.com Cc: Borislav Petkov bp@suse.de Cc: Brian Gerst brgerst@gmail.com Cc: Chen Yucong slaoub@gmail.com Cc: Chris Metcalf cmetcalf@mellanox.com Cc: Dave Hansen dave.hansen@linux.intel.com Cc: Fenghua Yu fenghua.yu@intel.com Cc: Huang Rui ray.huang@amd.com Cc: Jiri Slaby jslaby@suse.cz Cc: Jonathan Corbet corbet@lwn.net Cc: Michael S. Tsirkin mst@redhat.com Cc: Paul Gortmaker paul.gortmaker@windriver.com Cc: Peter Zijlstra peterz@infradead.org Cc: Ravi V. Shankar ravi.v.shankar@intel.com Cc: Shuah Khan shuah@kernel.org Cc: Vlastimil Babka vbabka@suse.cz Cc: Tony Luck tony.luck@intel.com Cc: Paolo Bonzini pbonzini@redhat.com Cc: Liang Z. Li liang.z.li@intel.com Cc: Alexandre Julliard julliard@winehq.org Cc: Stas Sergeev stsp@list.ru Cc: x86@kernel.org Cc: linux-msdos@vger.kernel.org Signed-off-by: Ricardo Neri ricardo.neri-calderon@linux.intel.com --- arch/x86/include/asm/umip.h | 16 +++++ arch/x86/kernel/Makefile | 1 + arch/x86/kernel/umip.c | 170 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 187 insertions(+) create mode 100644 arch/x86/include/asm/umip.h create mode 100644 arch/x86/kernel/umip.c
diff --git a/arch/x86/include/asm/umip.h b/arch/x86/include/asm/umip.h new file mode 100644 index 0000000..7bcaca6 --- /dev/null +++ b/arch/x86/include/asm/umip.h @@ -0,0 +1,16 @@ +#ifndef _ASM_X86_UMIP_H +#define _ASM_X86_UMIP_H + +#include <linux/types.h> +#include <asm/ptrace.h> +#include <asm/insn.h> + +#ifdef CONFIG_X86_INTEL_UMIP +int fixup_umip_exception(struct pt_regs *regs); +#else +static inline int fixup_umip_exception(struct pt_regs *regs) +{ + return -EINVAL; +} +#endif /* CONFIG_X86_INTEL_UMIP */ +#endif /* _ASM_X86_UMIP_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 581386c..c4aec02 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -124,6 +124,7 @@ obj-$(CONFIG_EFI) += sysfb_efi.o obj-$(CONFIG_PERF_EVENTS) += perf_regs.o obj-$(CONFIG_TRACING) += tracepoint.o obj-$(CONFIG_SCHED_MC_PRIO) += itmt.o +obj-$(CONFIG_X86_INTEL_UMIP) += umip.o
ifdef CONFIG_FRAME_POINTER obj-y += unwind_frame.o diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c new file mode 100644 index 0000000..a104aea --- /dev/null +++ b/arch/x86/kernel/umip.c @@ -0,0 +1,170 @@ +/* + * umip.c Emulation for instruction protected by the Intel User-Mode + * Instruction Prevention. The instructions are: + * sgdt + * sldt + * sidt + * str + * smsw + * + * Copyright (c) 2016, Intel Corporation. + * Ricardo Neri ricardo.neri@linux.intel.com + */ + +#include <linux/compiler.h> +#include <linux/bug.h> +#include <linux/uaccess.h> +#include <linux/err.h> +#include <asm/ptrace.h> +#include <asm/umip.h> +#include <linux/thread_info.h> +#include <linux/thread_info.h> + +/* + * The address of this dummy values need to be readable by + * the user space + */ + +static const long umip_dummy_gdt_base; +static const long umip_dummy_idt_base; + +enum umip_insn { + UMIP_SGDT = 0, /* opcode 0f 01 ModR/M reg 0 */ + UMIP_SIDT, /* opcode 0f 01 ModR/M reg 1 */ + UMIP_SLDT, /* opcode 0f 00 ModR/M reg 0 */ + UMIP_SMSW, /* opcode 0f 01 ModR/M reg 4 */ + UMIP_STR, /* opcode 0f 00 ModR/M reg 1 */ +}; + +static int __identify_insn(struct insn *insn) +{ + /* by getting modrm we also get the opcode */ + insn_get_modrm(insn); + if (insn->opcode.bytes[0] != 0xf) + return -EINVAL; + + if (insn->opcode.bytes[1] == 0x1) { + switch (X86_MODRM_REG(insn->modrm.value)) { + case 0: + return UMIP_SGDT; + case 1: + return UMIP_SIDT; + case 4: + return UMIP_SMSW; + default: + return -EINVAL; + } + } else if (insn->opcode.bytes[1] == 0x0) { + if (X86_MODRM_REG(insn->modrm.value) == 0) + return UMIP_SLDT; + else if (X86_MODRM_REG(insn->modrm.value) == 1) + return UMIP_STR; + else + return -EINVAL; + } +} + +static int __emulate_umip_insn(struct insn *insn, enum umip_insn umip_inst, + unsigned char *data, int *data_size) +{ + unsigned long const *dummy_base_addr; + unsigned short dummy_limit = 0; + unsigned short dummy_value = 0; + + switch (umip_inst) { + /* + * These two instructions return the base address and limit of the + * global and interrupt descriptor table. The base address can be + * 32-bit or 64-bit. Limit is always 16-bit. + */ + case UMIP_SGDT: + case UMIP_SIDT: + if (umip_inst == UMIP_SGDT) + dummy_base_addr = &umip_dummy_gdt_base; + else + dummy_base_addr = &umip_dummy_idt_base; + if (X86_MODRM_MOD(insn->modrm.value) == 3) { + WARN_ONCE(1, "SGDT cannot take register as argument!\n"); + return -EINVAL; + } + /* 16-bit operand. fill most significant byte with zeros */ + if (insn->opnd_bytes == 2) + dummy_base_addr = (unsigned long *) + ((unsigned long) + dummy_base_addr & 0xffffff); + memcpy(data + 2, &dummy_base_addr, sizeof(dummy_base_addr)); + memcpy(data, &dummy_limit, sizeof(dummy_limit)); + *data_size = sizeof(dummy_base_addr) + sizeof(dummy_limit); + break; + /* + * These three instructions return a 16-bit value. We return + * all zeros. This is equivalent to a null descriptor for + * str and sldt. For smsw, is equivalent to an all-zero CR0. + */ + case UMIP_SLDT: + case UMIP_SMSW: + case UMIP_STR: + /* if operand is a register, it is zero-extended*/ + if (X86_MODRM_MOD(insn->modrm.value) == 3) { + memset(data, 0, insn->opnd_bytes); + *data_size = insn->opnd_bytes; + } else + *data_size = sizeof(dummy_value); + memcpy(data, &dummy_value, sizeof(dummy_value)); + break; + default: + return -EINVAL; + } + return 0; +} + +int fixup_umip_exception(struct pt_regs *regs) +{ + struct insn insn; + unsigned char buf[MAX_INSN_SIZE]; + /* 10 bytes is the maximum size of the result of UMIP instructions */ + unsigned char dummy_data[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + int x86_64 = !test_thread_flag(TIF_IA32); + int not_copied, nr_copied, reg_offset, dummy_data_size; + void __user *uaddr; + unsigned long *reg_addr; + enum umip_insn umip_inst; + + not_copied = copy_from_user(buf, (void __user *)regs->ip, sizeof(buf)); + nr_copied = sizeof(buf) - not_copied; + /* + * The decoder _should_ fail nicely if we pass it a short buffer. + * But, let's not depend on that implementation detail. If we + * did not get anything, just error out now. + */ + if (!nr_copied) + return -EFAULT; + insn_init(&insn, buf, nr_copied, x86_64); + insn_get_length(&insn); + if (nr_copied < insn.length) + return -EFAULT; + + umip_inst = __identify_insn(&insn); + /* Check if we found an instruction protected by UMIP */ + if (umip_inst < 0) + return -EINVAL; + + if (__emulate_umip_insn(&insn, umip_inst, dummy_data, &dummy_data_size)) + return -EINVAL; + + /* If operand is a register, write directly to it */ + if (X86_MODRM_MOD(insn.modrm.value) == 3) { + reg_offset = get_reg_offset_rm(&insn, regs); + reg_addr = (unsigned long *)((unsigned long)regs + reg_offset); + memcpy(reg_addr, dummy_data, dummy_data_size); + } else { + uaddr = insn_get_addr_ref(&insn, regs); + nr_copied = copy_to_user(uaddr, dummy_data, dummy_data_size); + if (nr_copied > 0) + return -EFAULT; + } + + /* increase IP to let the program keep going */ + regs->ip += insn.length; + return 0; +}