-
Notifications
You must be signed in to change notification settings - Fork 224
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
refactor/optimize(bpf): rework bpf route with bpf_loop #580
Conversation
|
Guys could you set |
dae version d956bf1_optimize:_reduce_copy_route_params
go runtime go1.22.5 linux/amd64
Copyright (c) 2022-2024 @daeuniverse
License GNU AGPLv3 <https://github.com/daeuniverse/dae/blob/main/LICENSE>
/usr/bin/dae run --disable-timestamp -c /etc/dae/config.dae
FATAL 0: R1=ctx() R10=fp0
; int tproxy_lan_ingress(struct __sk_buff *skb) @ tproxy.c:1023
0: (bf) r6 = r1 ; R1=ctx() R6_w=ctx()
; if (get_link_h_len(skb->ifindex, &link_h_len)) @ tproxy.c:1035
1: (61) r1 = *(u32 *)(r6 +40) ; R1_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff)) R6_w=ctx()
2: (63) *(u32 *)(r10 -112) = r1 ; R1_w=scalar(id=1,smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff)) R10=fp0 fp-112=????scalar(id=1,smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))
3: (bf) r2 = r10 ; R2_w=fp0 R10=fp0
; @ tproxy.c:0
4: (07) r2 += -112 ; R2_w=fp-112
; __u32 *plink_h_len = bpf_map_lookup_elem(&linklen_map, &ifindex); @ tproxy.c:919
5: (18) r1 = 0xffff8f6c56e9a800 ; R1_w=map_ptr(map=linklen_map,ks=4,vs=4)
7: (85) call bpf_map_lookup_elem#1 ; R0_w=map_value_or_null(id=2,map=linklen_map,ks=4,vs=4)
; if (!plink_h_len) @ tproxy.c:921
8: (55) if r0 != 0x0 goto pc+2 11: R0_w=map_value(map=linklen_map,ks=4,vs=4) R6_w=ctx() R10=fp0 fp-112=????mmmm
; *link_h_len = *plink_h_len; @ tproxy.c:923
11: (61) r1 = *(u32 *)(r0 +0) ; R0_w=map_value(map=linklen_map,ks=4,vs=4) R1_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))
12: (63) *(u32 *)(r10 -236) = r1 ; R1_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff)) R10=fp0 fp-240=mmmm????
; if (link_h_len == ETH_HLEN) { @ tproxy.c:538
13: (55) if r1 != 0xe goto pc+13 ; R1_w=14
14: (b7) r7 = 0 ; R7_w=0
15: (bf) r3 = r10 ; R3_w=fp0 R10=fp0
; ret = bpf_skb_load_bytes(skb, offset, ethh, @ tproxy.c:539
16: (07) r3 += -128 ; R3_w=fp-128
17: (b7) r9 = 14 ; R9_w=14
18: (bf) r1 = r6 ; R1_w=ctx() R6_w=ctx()
19: (b7) r2 = 0 ; R2_w=0
20: (b7) r4 = 14 ; R4_w=14
21: (85) call bpf_skb_load_bytes#26 ; R0=scalar() fp-120=??mmmmmm fp-128=mmmmmmmm
; if (ret) { @ tproxy.c:541
22: (67) r0 <<= 32 ; R0_w=scalar(smax=0x7fffffff00000000,umax=0xffffffff00000000,smin32=0,smax32=umax32=0,var_off=(0x0; 0xffffffff00000000))
23: (77) r0 >>= 32 ; R0_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))
24: (15) if r0 == 0x0 goto pc+10 ; R0_w=0 R6=ctx() R7=0 R9=14 R10=fp0 fp-112=????mmmm fp-120=??mmmmmm fp-128=mmmmmmmm fp-240=mmmm????
; ethh->h_proto = skb->protocol; @ tproxy.c:549
35: (b7) r7 = 0 ; R7_w=0
; __builtin_memset(iph, 0, sizeof(struct iphdr)); @ tproxy.c:554
36: (63) *(u32 *)(r10 -136) = r7 ; R7_w=0 R10=fp0 fp-136=????0
37: (7b) *(u64 *)(r10 -144) = r7 ; R7_w=0 R10=fp0 fp-144_w=0
38: (7b) *(u64 *)(r10 -152) = r7 ; R7_w=0 R10=fp0 fp-152_w=0
; __builtin_memset(ipv6h, 0, sizeof(struct ipv6hdr)); @ tproxy.c:555
39: (7b) *(u64 *)(r10 -192) = r7 ; R7_w=0 R10=fp0 fp-192_w=0
40: (7b) *(u64 *)(r10 -184) = r7 ; R7_w=0 R10=fp0 fp-184_w=0
41: (7b) *(u64 *)(r10 -176) = r7 ; R7_w=0 R10=fp0 fp-176_w=0
42: (7b) *(u64 *)(r10 -168) = r7 ; R7_w=0 R10=fp0 fp-168_w=0
43: (7b) *(u64 *)(r10 -160) = r7 ; R7_w=0 R10=fp0 fp-160_w=0
; __builtin_memset(icmp6h, 0, sizeof(struct icmp6hdr)); @ tproxy.c:556
44: (7b) *(u64 *)(r10 -200) = r7 ; R7_w=0 R10=fp0 fp-200_w=0
; __builtin_memset(tcph, 0, sizeof(struct tcphdr)); @ tproxy.c:557
45: (7b) *(u64 *)(r10 -224) = r7 ; R7_w=0 R10=fp0 fp-224_w=0
46: (7b) *(u64 *)(r10 -216) = r7 ; R7_w=0 R10=fp0 fp-216_w=0
47: (63) *(u32 *)(r10 -208) = r7 ; R7_w=0 R10=fp0 fp-208=????0
; __builtin_memset(udph, 0, sizeof(struct udphdr)); @ tproxy.c:558
48: (7b) *(u64 *)(r10 -232) = r7 ; R7_w=0 R10=fp0 fp-232_w=0
49: (bf) r1 = r10 ; R1_w=fp0 R10=fp0
; @ tproxy.c:0
50: (07) r1 += -128 ; R1_w=fp-128
; if (ethh->h_proto == bpf_htons(ETH_P_IP)) { @ tproxy.c:563
51: (69) r2 = *(u16 *)(r1 +12) ; R1_w=fp-128 R2_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=0xffff,var_off=(0x0; 0xffff)) fp-120=??mmmmmm
52: (55) if r2 != 0x8 goto pc+28 ; R2_w=8
53: (bf) r3 = r10 ; R3_w=fp0 R10=fp0
; ret = bpf_skb_load_bytes(skb, offset, iph, @ tproxy.c:564
54: (07) r3 += -152 ; R3_w=fp-152
55: (bf) r1 = r6 ; R1_w=ctx() R6=ctx()
56: (bf) r2 = r9 ; R2_w=14 R9=14
57: (b7) r4 = 20 ; R4_w=20
58: (85) call bpf_skb_load_bytes#26 ; R0=scalar() fp-136=????mmmm fp-144=mmmmmmmm fp-152=mmmmmmmm
; if (ret) @ tproxy.c:566
59: (67) r0 <<= 32 ; R0_w=scalar(smax=0x7fffffff00000000,umax=0xffffffff00000000,smin32=0,smax32=umax32=0,var_off=(0x0; 0xffffffff00000000))
60: (77) r0 >>= 32 ; R0_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))
61: (55) if r0 != 0x0 goto pc-37 ; R0_w=0
62: (bf) r1 = r10 ; R1_w=fp0 R10=fp0
; offset += iph->ihl * 4; @ tproxy.c:569
63: (07) r1 += -152 ; R1_w=fp-152
64: (71) r2 = *(u8 *)(r1 +0) ; R1_w=fp-152 R2_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=255,var_off=(0x0; 0xff)) fp-152=mmmmmmmm
65: (67) r2 <<= 2 ; R2_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=1020,var_off=(0x0; 0x3fc))
66: (57) r2 &= 60 ; R2_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=60,var_off=(0x0; 0x3c))
67: (0f) r9 += r2 ; R2_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=60,var_off=(0x0; 0x3c)) R9_w=scalar(smin=umin=smin32=umin32=14,smax=umax=smax32=umax32=74,var_off=(0x2; 0x7c))
; *l4proto = iph->protocol; @ tproxy.c:572
68: (71) r8 = *(u8 *)(r1 +9) ; R1_w=fp-152 R8_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=255,var_off=(0x0; 0xff)) fp-144=mmmmmmmm
; switch (iph->protocol) { @ tproxy.c:573
69: (15) if r8 == 0x11 goto pc+49 ; R8_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=255,var_off=(0x0; 0xff))
70: (55) if r8 != 0x6 goto pc-46 ; R8=6
71: (bf) r3 = r10 ; R3_w=fp0 R10=fp0
; ret = bpf_skb_load_bytes(skb, offset, tcph, @ tproxy.c:575
72: (07) r3 += -224 ; R3_w=fp-224
73: (bf) r1 = r6 ; R1_w=ctx() R6=ctx()
74: (bf) r2 = r9 ; R2_w=scalar(id=3,smin=umin=smin32=umin32=14,smax=umax=smax32=umax32=74,var_off=(0x2; 0x7c)) R9=scalar(id=3,smin=umin=smin32=umin32=14,smax=umax=smax32=umax32=74,var_off=(0x2; 0x7c))
75: (b7) r4 = 20 ; R4_w=20
76: (85) call bpf_skb_load_bytes#26 ; R0_w=scalar() fp-208=????mmmm fp-216=mmmmmmmm fp-224=mmmmmmmm
; if (ret) { @ tproxy.c:577
77: (67) r0 <<= 32 ; R0_w=scalar(smax=0x7fffffff00000000,umax=0xffffffff00000000,smin32=0,smax32=umax32=0,var_off=(0x0; 0xffffffff00000000))
78: (77) r0 >>= 32 ; R0=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))
79: (15) if r0 == 0x0 goto pc+48 128: R0=0 R6=ctx() R7=0 R8=6 R9=scalar(id=3,smin=umin=smin32=umin32=14,smax=umax=smax32=umax32=74,var_off=(0x2; 0x7c)) R10=fp0 fp-112=????mmmm fp-120=??mmmmmm fp-128=mmmmmmmm fp-136=????mmmm fp-144=mmmmmmmm fp-152=mmmmmmmm fp-160=0 fp-168=0 fp-176=0 fp-184=0 fp-192=0 fp-200=0 fp-208=????mmmm fp-216=mmmmmmmm fp-224=mmmmmmmm fp-232=0 fp-240=mmmm????
; if (ret) { @ tproxy.c:585
128: (b7) r1 = 0 ; R1_w=0
129: (05) goto pc+196
; if (ret) { @ tproxy.c:1039
326: (67) r1 <<= 32 ; R1_w=0
327: (77) r1 >>= 32 ; R1_w=0
328: (55) if r1 != 0x0 goto pc-304 ; R1_w=0
329: (15) if r8 == 0x3a goto pc-305 ; R8=6
330: (b7) r1 = 0 ; R1_w=0
; __builtin_memset(tuples, 0, sizeof(*tuples)); @ tproxy.c:397
331: (7b) *(u64 *)(r10 -256) = r1 ; R1_w=0 R10=fp0 fp-256_w=0
332: (7b) *(u64 *)(r10 -248) = r1 ; R1_w=0 R10=fp0 fp-248_w=0
333: (7b) *(u64 *)(r10 -264) = r1 ; R1_w=0 R10=fp0 fp-264_w=0
334: (7b) *(u64 *)(r10 -272) = r1 ; R1_w=0 R10=fp0 fp-272_w=0
335: (7b) *(u64 *)(r10 -280) = r1 ; R1_w=0 R10=fp0 fp-280_w=0
336: (7b) *(u64 *)(r10 -288) = r1 ; R1_w=0 R10=fp0 fp-288_w=0
; tuples->five.l4proto = l4proto; @ tproxy.c:398
337: (73) *(u8 *)(r10 -252) = r8 ; R8=6 R10=fp0 fp-256_w=mmmmmmmm
; if (skb->protocol == bpf_htons(ETH_P_IP)) { @ tproxy.c:400
338: (61) r1 = *(u32 *)(r6 +16) ; R1_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff)) R6=ctx()
339: (55) if r1 != 0x8 goto pc+13 ; R1_w=8
340: (18) r1 = 0xffff0000 ; R1_w=0xffff0000
; tuples->five.sip.u6_addr32[2] = bpf_htonl(0x0000ffff); @ tproxy.c:401
342: (63) *(u32 *)(r10 -280) = r1 ; R1_w=0xffff0000 R10=fp0 fp-280_w=mmmm0xffff0000
; tuples->five.dip.u6_addr32[2] = bpf_htonl(0x0000ffff); @ tproxy.c:404
343: (63) *(u32 *)(r10 -264) = r1 ; R1_w=0xffff0000 R10=fp0 fp-264_w=mmmm0xffff0000
344: (bf) r1 = r10 ; R1_w=fp0 R10=fp0
; tuples->five.sip.u6_addr32[2] = bpf_htonl(0x0000ffff); @ tproxy.c:401
345: (07) r1 += -152 ; R1_w=fp-152
; tuples->five.sip.u6_addr32[3] = iph->saddr; @ tproxy.c:402
346: (61) r2 = *(u32 *)(r1 +12) ; R1_w=fp-152 R2_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff)) fp-144=mmmmmmmm
347: (63) *(u32 *)(r10 -276) = r2 ; R2_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff)) R10=fp0 fp-280_w=mmmmmmmm
; tuples->five.dip.u6_addr32[3] = iph->daddr; @ tproxy.c:405
348: (61) r2 = *(u32 *)(r1 +16) ; R1_w=fp-152 R2_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff)) fp-136=????mmmm
349: (63) *(u32 *)(r10 -260) = r2 ; R2_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff)) R10=fp0 fp-264_w=mmmmmmmm
; return (iph->tos & 0xfc) >> 2; @ tproxy.c:384
350: (71) r2 = *(u8 *)(r1 +1) ; R1_w=fp-152 R2_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=255,var_off=(0x0; 0xff)) fp-152=mmmmmmmm
351: (77) r2 >>= 2 ; R2_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=63,var_off=(0x0; 0x3f))
352: (05) goto pc+34
; @ tproxy.c:0
387: (73) *(u8 *)(r10 -248) = r2 ; R2=scalar(id=4,smin=smin32=0,smax=umax=smax32=umax32=63,var_off=(0x0; 0x3f)) R10=fp0 fp-248=mmmmmmmscalar(id=4,smin=smin32=0,smax=umax=smax32=umax32=63,var_off=(0x0; 0x3f))
; if (l4proto == IPPROTO_TCP) { @ tproxy.c:417
388: (55) if r8 != 0x6 goto pc+7 ; R8=6
389: (b7) r3 = 2 ; R3_w=2
390: (bf) r1 = r10 ; R1_w=fp0 R10=fp0
; } else { @ tproxy.c:420
391: (07) r1 += -224 ; R1_w=fp-224
392: (bf) r2 = r1 ; R1_w=fp-224 R2_w=fp-224
393: (0f) r2 += r3 ; R2_w=fp-222 R3_w=2
394: (b7) r3 = 0 ; R3_w=0
395: (05) goto pc+6
402: (0f) r1 += r3 ; R1_w=fp-224 R3=0
; @ tproxy.c:0
403: (69) r1 = *(u16 *)(r1 +0) ; R1_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=0xffff,var_off=(0x0; 0xffff)) fp-224=mmmmmmmm
404: (6b) *(u16 *)(r10 -256) = r1 ; R1_w=scalar(id=5,smin=smin32=0,smax=umax=smax32=umax32=0xffff,var_off=(0x0; 0xffff)) R10=fp0 fp-256=mmmmmmscalar(id=5,smin=smin32=0,smax=umax=smax32=umax32=0xffff,var_off=(0x0; 0xffff))
405: (69) r1 = *(u16 *)(r2 +0) ; R1_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=0xffff,var_off=(0x0; 0xffff)) R2=fp-222 fp-224=mmmmmmmm
406: (6b) *(u16 *)(r10 -254) = r1 ; R1_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=0xffff,var_off=(0x0; 0xffff)) R10=fp0 fp-256=mmmmmmmm
407: (b7) r1 = 0 ; R1_w=0
; struct bpf_sock_tuple tuple = { 0 }; @ tproxy.c:1063
408: (63) *(u32 *)(r10 -296) = r1 ; R1_w=0 R10=fp0 fp-296=????0
409: (7b) *(u64 *)(r10 -304) = r1 ; R1_w=0 R10=fp0 fp-304_w=0
410: (7b) *(u64 *)(r10 -312) = r1 ; R1_w=0 R10=fp0 fp-312_w=0
411: (7b) *(u64 *)(r10 -320) = r1 ; R1_w=0 R10=fp0 fp-320_w=0
412: (7b) *(u64 *)(r10 -328) = r1 ; R1_w=0 R10=fp0 fp-328_w=0
; if (skb->protocol == bpf_htons(ETH_P_IP)) { @ tproxy.c:1067
413: (61) r1 = *(u32 *)(r6 +16) ; R1_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff)) R6=ctx()
414: (55) if r1 != 0x8 goto pc+82 ; R1_w=8
415: (bf) r1 = r10 ; R1_w=fp0 R10=fp0
; tuple.ipv4.daddr = tuples.five.dip.u6_addr32[3]; @ tproxy.c:1068
416: (07) r1 += -328 ; R1_w=fp-328
417: (61) r2 = *(u32 *)(r10 -260) ; R2_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff)) R10=fp0 fp-264=mmmmmmmm
418: (63) *(u32 *)(r1 +4) = r2 ; R1_w=fp-328 R2_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff)) fp-328_w=mmmmmmmm
; tuple.ipv4.saddr = tuples.five.sip.u6_addr32[3]; @ tproxy.c:1069
419: (61) r2 = *(u32 *)(r10 -276) ; R2_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff)) R10=fp0 fp-280=mmmmmmmm
420: (63) *(u32 *)(r1 +0) = r2 ; R1_w=fp-328 R2_w=scalar(id=6,smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff)) fp-328_w=mmmmscalar(id=6,smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))
; tuple.ipv4.dport = tuples.five.dport; @ tproxy.c:1070
421: (69) r2 = *(u16 *)(r10 -254) ; R2_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=0xffff,var_off=(0x0; 0xffff)) R10=fp0 fp-256=mmmmmmmm
422: (6b) *(u16 *)(r1 +10) = r2 ; R1_w=fp-328 R2_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=0xffff,var_off=(0x0; 0xffff)) fp-320_w=mmmmmmmm
; tuple.ipv4.sport = tuples.five.sport; @ tproxy.c:1071
423: (69) r2 = *(u16 *)(r10 -256) ; R2_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=0xffff,var_off=(0x0; 0xffff)) R10=fp0 fp-256=mmmmmmmm
424: (6b) *(u16 *)(r1 +8) = r2 ; R1_w=fp-328 R2_w=scalar(id=7,smin=smin32=0,smax=umax=smax32=umax32=0xffff,var_off=(0x0; 0xffff)) fp-320_w=mmmmmmscalar(id=7,smin=smin32=0,smax=umax=smax32=umax32=0xffff,var_off=(0x0; 0xffff))
425: (b7) r3 = 12 ; R3_w=12
; if (l4proto == IPPROTO_TCP) { @ tproxy.c:1083
426: (55) if r8 != 0x6 goto pc+1 ; R8=6
427: (05) goto pc+101
529: (bf) r1 = r10 ; R1_w=fp0 R10=fp0
; if (tcph.syn && !tcph.ack) @ tproxy.c:1085
530: (07) r1 += -224 ; R1_w=fp-224
531: (69) r1 = *(u16 *)(r1 +12) ; R1_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=0xffff,var_off=(0x0; 0xffff)) fp-216=mmmmmmmm
532: (57) r1 &= 512 ; R1_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=512,var_off=(0x0; 0x200))
533: (15) if r1 == 0x0 goto pc+5 ; R1_w=scalar(smin=umin=smin32=umin32=1,smax=umax=smax32=umax32=512,var_off=(0x0; 0x200))
534: (bf) r1 = r10 ; R1_w=fp0 R10=fp0
535: (07) r1 += -224 ; R1_w=fp-224
536: (69) r1 = *(u16 *)(r1 +12) ; R1_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=0xffff,var_off=(0x0; 0xffff)) fp-216=mmmmmmmm
537: (57) r1 &= 4096 ; R1_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=4096,var_off=(0x0; 0x1000))
538: (15) if r1 == 0x0 goto pc+13 ; R1_w=scalar(smin=umin=smin32=umin32=1,smax=umax=smax32=umax32=4096,var_off=(0x0; 0x1000))
; PARAM.dae_netns_id, 0); @ tproxy.c:1089
539: (18) r1 = 0xffff8f6e250b2af8 ; R1_w=map_value(map=.rodata,ks=4,vs=32)
541: (61) r4 = *(u32 *)(r1 +12) ; R1_w=map_value(map=.rodata,ks=4,vs=32) R4_w=0
542: (bf) r2 = r10 ; R2_w=fp0 R10=fp0
543: (07) r2 += -328 ; R2_w=fp-328
; sk = bpf_skc_lookup_tcp(skb, &tuple, tuple_size, @ tproxy.c:1088
544: (bf) r1 = r6 ; R1_w=ctx() R6=ctx()
545: (b7) r5 = 0 ; R5_w=0
546: (85) call bpf_skc_lookup_tcp#99 ; R0_w=sock_common_or_null(id=9,ref_obj_id=9) refs=9
; if (sk) { @ tproxy.c:1090
547: (15) if r0 == 0x0 goto pc+4 ; R0_w=sock_common(ref_obj_id=9) refs=9
; if (sk->state != BPF_TCP_LISTEN) { @ tproxy.c:1091
548: (61) r7 = *(u32 *)(r0 +72) ; R0_w=sock_common(ref_obj_id=9) R7_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff)) refs=9
; @ tproxy.c:0
549: (bf) r1 = r0 ; R0_w=sock_common(ref_obj_id=9) R1_w=sock_common(ref_obj_id=9) refs=9
550: (85) call bpf_sk_release#86 ; R0=scalar()
; if (sk->state != BPF_TCP_LISTEN) { @ tproxy.c:1091
551: (55) if r7 != 0xa goto pc+146 ; R7=10
552: (bf) r1 = r10 ; R1_w=fp0 R10=fp0
; @ tproxy.c:0
553: (07) r1 += -224 ; R1_w=fp-224
554: (b7) r7 = 0 ; R7_w=0
; __builtin_memset(¶ms, 0, sizeof(params)); @ tproxy.c:1103
555: (7b) *(u64 *)(r10 -336) = r7 ; R7_w=0 R10=fp0 fp-336_w=0
556: (7b) *(u64 *)(r10 -344) = r7 ; R7_w=0 R10=fp0 fp-344_w=0
557: (7b) *(u64 *)(r10 -352) = r7 ; R7_w=0 R10=fp0 fp-352_w=0
558: (7b) *(u64 *)(r10 -360) = r7 ; R7_w=0 R10=fp0 fp-360_w=0
559: (7b) *(u64 *)(r10 -368) = r7 ; R7_w=0 R10=fp0 fp-368_w=0
560: (7b) *(u64 *)(r10 -376) = r7 ; R7_w=0 R10=fp0 fp-376_w=0
561: (7b) *(u64 *)(r10 -384) = r7 ; R7_w=0 R10=fp0 fp-384_w=0
562: (7b) *(u64 *)(r10 -392) = r7 ; R7_w=0 R10=fp0 fp-392_w=0
563: (7b) *(u64 *)(r10 -400) = r7 ; R7_w=0 R10=fp0 fp-400_w=0
; if (!(tcph.syn && !tcph.ack)) { @ tproxy.c:1105
564: (69) r1 = *(u16 *)(r1 +12) ; R1_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=0xffff,var_off=(0x0; 0xffff)) fp-216=mmmmmmmm
565: (57) r1 &= 512 ; R1_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=512,var_off=(0x0; 0x200))
566: (15) if r1 == 0x0 goto pc-542 ; R1_w=scalar(smin=umin=smin32=umin32=1,smax=umax=smax32=umax32=512,var_off=(0x0; 0x200))
567: (b7) r2 = 1 ; R2_w=1
568: (bf) r1 = r10 ; R1_w=fp0 R10=fp0
569: (07) r1 += -224 ; R1_w=fp-224
570: (69) r3 = *(u16 *)(r1 +12) ; R1_w=fp-224 R3_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=0xffff,var_off=(0x0; 0xffff)) fp-216=mmmmmmmm
571: (57) r3 &= 4096 ; R3=scalar(smin=smin32=0,smax=umax=smax32=umax32=4096,var_off=(0x0; 0x1000))
572: (15) if r3 == 0x0 goto pc-132 441: R0=scalar() R1=fp-224 R2=1 R3=0 R6=ctx() R7=0 R8=6 R9=scalar(id=3,smin=umin=smin32=umin32=14,smax=umax=smax32=umax32=74,var_off=(0x2; 0x7c)) R10=fp0 fp-112=????mmmm fp-120=??mmmmmm fp-128=mmmmmmmm fp-136=????mmmm fp-144=mmmmmmmm fp-152=mmmmmmmm fp-160=0 fp-168=0 fp-176=0 fp-184=0 fp-192=0 fp-200=0 fp-208=????mmmm fp-216=mmmmmmmm fp-224=mmmmmmmm fp-232=0 fp-240=mmmm???? fp-248=mmmmmmmscalar(id=4,smin=smin32=0,smax=umax=smax32=umax32=63,var_off=(0x0; 0x3f)) fp-256=mmmmmmmm fp-264=mmmmmmmm fp-272=0 fp-280=mmmmmmmm fp-288=0 fp-296=????0 fp-304=0 fp-312=0 fp-320=mmmmmmmm fp-328=mmmmmmmm fp-336=0 fp-344=0 fp-352=0 fp-360=0 fp-368=0 fp-376=0 fp-384=0 fp-392=0 fp-400=0
; @ tproxy.c:0
441: (63) *(u32 *)(r10 -400) = r2 ; R2=1 R10=fp0 fp-400=mmmm1
442: (7b) *(u64 *)(r10 -368) = r1 ; R1=fp-224 R10=fp0 fp-368_w=fp-224
; if (skb->protocol == bpf_htons(ETH_P_IP)) @ tproxy.c:1116
443: (61) r3 = *(u32 *)(r6 +16) ; R3_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff)) R6=ctx()
444: (b7) r2 = 1 ; R2_w=1
445: (15) if r3 == 0x8 goto pc+1 ; R3_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))
446: (b7) r2 = 2 ; R2=2
447: (bf) r3 = r10 ; R3_w=fp0 R10=fp0
; params.mac[2] = bpf_htonl((ethh.h_source[0] << 8) + (ethh.h_source[1])); @ tproxy.c:1121
448: (07) r3 += -128 ; R3_w=fp-128
449: (71) r4 = *(u8 *)(r3 +7) ; R3_w=fp-128 R4_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=255,var_off=(0x0; 0xff)) fp-128=mmmmmmmm
450: (71) r5 = *(u8 *)(r3 +6) ; R3_w=fp-128 R5_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=255,var_off=(0x0; 0xff)) fp-128=mmmmmmmm
451: (67) r5 <<= 8 ; R5_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=0xff00,var_off=(0x0; 0xff00))
452: (4f) r5 |= r4 ; R4_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=255,var_off=(0x0; 0xff)) R5_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=0xffff,var_off=(0x0; 0xffff))
; params.flag[6] = tuples.dscp; @ tproxy.c:1120
453: (71) r4 = *(u8 *)(r10 -248) ; R4_w=scalar(id=4,smin=smin32=0,smax=umax=smax32=umax32=63,var_off=(0x0; 0x3f)) R10=fp0 fp-248=mmmmmmmscalar(id=4,smin=smin32=0,smax=umax=smax32=umax32=63,var_off=(0x0; 0x3f))
454: (63) *(u32 *)(r10 -376) = r4 ; R4_w=scalar(id=4,smin=smin32=0,smax=umax=smax32=umax32=63,var_off=(0x0; 0x3f)) R10=fp0 fp-376=mmmmscalar(id=4,smin=smin32=0,smax=umax=smax32=umax32=63,var_off=(0x0; 0x3f))
; @ tproxy.c:0
455: (63) *(u32 *)(r10 -396) = r2 ; R2=2 R10=fp0 fp-400=mmmmmmmm
; params.mac[2] = bpf_htonl((ethh.h_source[0] << 8) + (ethh.h_source[1])); @ tproxy.c:1121
456: (dc) r5 = be32 r5 ; R5_w=scalar()
457: (63) *(u32 *)(r10 -336) = r5 ; R5_w=scalar() R10=fp0 fp-336=mmmmscalar()
; bpf_htonl((ethh.h_source[2] << 24) + (ethh.h_source[3] << 16) + @ tproxy.c:1123
458: (71) r2 = *(u8 *)(r3 +11) ; R2_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=255,var_off=(0x0; 0xff)) R3_w=fp-128 fp-120=??mmmmmm
459: (71) r4 = *(u8 *)(r3 +10) ; R3_w=fp-128 R4_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=255,var_off=(0x0; 0xff)) fp-120=??mmmmmm
460: (71) r5 = *(u8 *)(r3 +9) ; R3_w=fp-128 R5_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=255,var_off=(0x0; 0xff)) fp-120=??mmmmmm
461: (71) r3 = *(u8 *)(r3 +8) ; R3_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=255,var_off=(0x0; 0xff)) fp-120=??mmmmmm
462: (bf) r0 = r10 ; R0_w=fp0 R10=fp0
; params.mac[2] = bpf_htonl((ethh.h_source[0] << 8) + (ethh.h_source[1])); @ tproxy.c:1121
463: (07) r0 += -288 ; R0_w=fp-288
; params.saddr = tuples.five.sip.u6_addr32; @ tproxy.c:1125
464: (7b) *(u64 *)(r10 -360) = r0 ; R0_w=fp-288 R10=fp0 fp-360_w=fp-288
; bpf_htonl((ethh.h_source[2] << 24) + (ethh.h_source[3] << 16) + @ tproxy.c:1123
465: (67) r3 <<= 24 ; R3_w=scalar(smin=0,smax=umax=umax32=0xff000000,smax32=0x7f000000,var_off=(0x0; 0xff000000))
466: (67) r5 <<= 16 ; R5_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=0xff0000,var_off=(0x0; 0xff0000))
467: (4f) r5 |= r3 ; R3_w=scalar(smin=0,smax=umax=umax32=0xff000000,smax32=0x7f000000,var_off=(0x0; 0xff000000)) R5_w=scalar(smin=0,smax=umax=umax32=0xffff0000,smax32=0x7fff0000,var_off=(0x0; 0xffff0000))
468: (67) r4 <<= 8 ; R4_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=0xff00,var_off=(0x0; 0xff00))
469: (4f) r5 |= r4 ; R4_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=0xff00,var_off=(0x0; 0xff00)) R5_w=scalar(smin=0,smax=umax=umax32=0xffffff00,smax32=0x7fffff00,var_off=(0x0; 0xffffff00))
470: (4f) r5 |= r2 ; R2_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=255,var_off=(0x0; 0xff)) R5_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))
471: (dc) r5 = be32 r5 ; R5_w=scalar()
; params.mac[3] = @ tproxy.c:1122
472: (63) *(u32 *)(r10 -332) = r5 ; R5_w=scalar() R10=fp0 fp-336=mmmmmmmm
; params.daddr = tuples.five.dip.u6_addr32; @ tproxy.c:1126
473: (bf) r2 = r10 ; R2_w=fp0 R10=fp0
474: (07) r2 += -272 ; R2_w=fp-272
475: (7b) *(u64 *)(r10 -352) = r2 ; R2_w=fp-272 R10=fp0 fp-352_w=fp-272
476: (b7) r3 = 0 ; R3_w=0
; __builtin_memset(&ctx, 0, sizeof(ctx)); @ tproxy.c:871
477: (7b) *(u64 *)(r10 -32) = r3 ; R3_w=0 R10=fp0 fp-32_w=0
478: (7b) *(u64 *)(r10 -40) = r3 ; R3_w=0 R10=fp0 fp-40_w=0
479: (7b) *(u64 *)(r10 -48) = r3 ; R3_w=0 R10=fp0 fp-48_w=0
480: (7b) *(u64 *)(r10 -56) = r3 ; R3_w=0 R10=fp0 fp-56_w=0
481: (7b) *(u64 *)(r10 -64) = r3 ; R3_w=0 R10=fp0 fp-64_w=0
482: (7b) *(u64 *)(r10 -72) = r3 ; R3_w=0 R10=fp0 fp-72_w=0
483: (7b) *(u64 *)(r10 -80) = r3 ; R3_w=0 R10=fp0 fp-80_w=0
484: (7b) *(u64 *)(r10 -88) = r3 ; R3_w=0 R10=fp0 fp-88_w=0
485: (7b) *(u64 *)(r10 -104) = r3 ; R3_w=0 R10=fp0 fp-104_w=0
486: (b7) r4 = -8 ; R4_w=-8
; ctx.result = -ENOEXEC; @ tproxy.c:873
487: (7b) *(u64 *)(r10 -96) = r4 ; R4_w=-8 R10=fp0 fp-96_w=-8
488: (bf) r4 = r10 ; R4_w=fp0 R10=fp0
; params.mac[2] = bpf_htonl((ethh.h_source[0] << 8) + (ethh.h_source[1])); @ tproxy.c:1121
489: (07) r4 += -400 ; R4_w=fp-400
; ctx.params = params; @ tproxy.c:872
490: (7b) *(u64 *)(r10 -112) = r4 ; R4_w=fp-400 R10=fp0 fp-112_w=fp-400
; if (_l4proto_type == L4ProtoType_TCP) { @ tproxy.c:876
491: (55) if r8 != 0x6 goto pc+82 ; R8=6
492: (b7) r5 = 0 ; R5_w=0
493: (bf) r4 = r1 ; R1=fp-224 R4_w=fp-224
494: (0f) r4 += r5 ; R4_w=fp-224 R5_w=0
; ctx.h_dport = bpf_ntohs(((struct tcphdr *)params->l4hdr)->dest); @ tproxy.c:877
495: (69) r1 = *(u16 *)(r1 +2) ; R1_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=0xffff,var_off=(0x0; 0xffff)) fp-224=mmmmmmmm
496: (05) goto pc+82
; @ tproxy.c:0
579: (dc) r1 = be16 r1 ; R1_w=scalar()
580: (6b) *(u16 *)(r10 -104) = r1 ; R1_w=scalar() R10=fp0 fp-104=mmmmmmscalar()
; (ctx.h_dport == 53 && _l4proto_type == L4ProtoType_UDP) << 3; @ tproxy.c:890
581: (15) if r1 == 0x35 goto pc+1 ; R1_w=scalar()
582: (b7) r3 = 0 ; R3_w=0
; @ tproxy.c:0
583: (69) r1 = *(u16 *)(r4 +0) ; R1_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=0xffff,var_off=(0x0; 0xffff)) R4=fp-224 fp-224=mmmmmmmm
; ctx.isdns_must_goodsubrule_badrule = @ tproxy.c:889
584: (73) *(u8 *)(r10 -28) = r3 ; R3_w=0 R10=fp0 fp-32=mmm0mmmm
; @ tproxy.c:0
585: (dc) r1 = be16 r1 ; R1_w=scalar()
586: (6b) *(u16 *)(r10 -102) = r1 ; R1_w=scalar() R10=fp0 fp-104=mmmmmmmm
; instruction poisoned by CO-RE @ :0
587: (85) call unknown#195896080
invalid func unknown#195896080
processed 247 insns (limit 1000000) max_states_per_insn 0 total_states 14 peak_states 14 mark_read 12 |
67e06fb
to
4c4159b
Compare
确认正常工作 🎉🎉🎉 |
What does this significant increase in startup speed refer to? commit be093c (main branch):
this patch:
It seems to be all 3s? The time it takes for DAE to start up and be able to route traffic does need optimization, but it seems that this patch doesn't show it. Am I doing something wrong? |
跑上了,用上香港节点,比其他西方记者跑得还快 |
@douglarek 或许你的设备性能比较好,瓶颈不在这部分。我们收到了一些报告,有一些低端设备载入时间超过30秒,有一个相关的PR专门用来解决这个问题 #510 |
Okay, I'll test my r5c oil leak detector tonight. 😃 update Still couldn't find any difference: commit be093c (main branch):
this patch:
They're all about 15 seconds, not a significant improvement. |
edited: 现在全部正常 |
@douglarek 感谢测试!我会修改我的描述 |
Tested on my three machines benchmark |
ImmortalWrt Snapshot |
测试e843975,确认已经可以识别bpf_loop了,不再强制检测内核版本了,换句话说任意内核只要bp了这个特性就能润起来了。 |
@zzzsyyy 感谢你的测试,从结果上来说很不错。可以描述一下你的测试方法吗? |
What is measured only is the time between the log |
定位到了我这边x86_64 proxy异常的原因:是clang版本,使用clang15编译的dae一切正常,使用clang18的就直连可以,proxy不行 update:cgo enable, openwrt with kernel 5.15 with bpf_loop bp, musl |
我18没问题!😅😅😅 |
@mzz2017 恭喜。另外也许可以考虑禁止 |
我用 BPF_PROG_TEST_RUN 运行 5,000,000 次 wan_egress,在只有一个规则的情况下 ( 测试代码在 jschwinger233#22 |
@jschwinger233 听起来非常不错,可以测试一下更多规则的情况吗,例如之前在有 50 条 port 规则时会出现明显的延时,例如 ping 可能会增加数十延时? |
50 个 dport 规则 + fallback
测试的 skb dport 79 会遍历全部规则最后命中 fallback,main 的 wan_egress 运行五百万遍耗时 8.23037418s,此 PR 耗时 4.077763339s,提升 101.83% 之后我把 bpf 测试集成到 ci 吧 |
8739815
to
840d97d
Compare
Background
过去,route 函数总是指令数限制的重灾区,由于它过于复杂,容易使得指令数超限,许多有趣的功能无法扩展,bpf 的写法也需要经常和 verifier 斗智斗勇。
本次 PR 使用 bpf_loop helper 重写 bpf route 部分,
带来如下几个好处:
带来的问题:
TODO:benchmark
Checklist
Full Changelogs
Issue Reference
Closes #[issue number]
Test Result