본문 바로가기

Skills/mY Technutz

Guest VM 의 rx overflow issue vmcore 분석

언제나처럼 vmcore 분석인데... 다소 간단하면서도 찾아내기 힘든 케이스였다.

vmcore 분석에 대한 Article 을 더이상 개제하지 않기로 했으나, 과정 기억을 위해 기록.

Xen Guest VM 의 UEK5 의 경우 메모리배치의 차이로 인해 vmcore 를 읽을 때 다소 우회적인 방법이 필요하다 :


$ crash7latest --machdep phys_base=7c5000000 --kaslr=0x25000000 vmlinux 2021-1220-0535.06-0004fb0000060000f9bc0548d10925c3.26.core

      KERNEL: /share/linuxrpm/vmlinux_repo/64/4.14.35-1902.5.2.el7uek.x86_64/vmlinux
    DUMPFILE: ovhpasdbo1_dump.tgz_extract/0004fb0000060000f9bc0548d10925c3/2021-1220-0535.06-0004fb0000060000f9bc0548d10925c3.26.core
        CPUS: 24 [OFFLINE: 23]
        DATE: Mon Dec 20 05:35:06 KST 2021
      UPTIME: 41 days, 10:55:23
LOAD AVERAGE: 1.48, 1.18, 0.82
       TASKS: 1360
    NODENAME: ovhpasdbo1
     RELEASE: 4.14.35-1902.5.2.el7uek.x86_64
     VERSION: #2 SMP Wed Aug 28 21:39:33 GMT 2019
     MACHINE: x86_64  (2095 Mhz)
      MEMORY: 32 GB
       PANIC: "BUG: unable to handle kernel NULL pointer dereference at 000000000000002c"
         PID: 0
     COMMAND: "swapper/0"
        TASK: ffffffffa7412480  (1 of 24)  [THREAD_INFO: ffffffffa7412480]
         CPU: 0
       STATE: TASK_RUNNING (PANIC)

crash7latest> extend /home/mirrlee/mpykdump64.so
Setting scroll off while initializing PyKdump
/home/mirrlee/mpykdump64.so: shared object loaded
crash7latest> epython /home/mirrlee/pycrashext-master/regext.py

 ** Execution took   0.07s (real)   0.07s (CPU)

로그 확인

crash7latest> log
669787.195295] hrtimer: interrupt took 19408 ns
[3581719.424780] BUG: unable to handle kernel NULL pointer dereference at 000000000000002c
[3581719.424852] IP: xennet_poll+0x2bb/0xb30 [xen_netfront]
[3581719.424911] PGD 0 P4D 0 
[3581719.424931] Oops: 0000 [#1] SMP PTI
[3581719.424955] Modules linked in: rds oracleacfs(PO) oracleadvm(PO) oracleoks(PO) ovmapi sunrpc dm_mod xfs libcrc32c ext4 mbcache jbd2 fscrypto ppdev crct10dif_pclmul crc32_pclmul ghash_clmulni_intel cirrus pcbc ttm xen_fbfront drm_kms_helper drm parport_pc aesni_intel crypto_simd glue_helper syscopyarea sysfillrect sysimgblt cryptd fb_sys_fops pcspkr parport i2c_piix4 binfmt_misc ip_tables btrfs xor zstd_decompress zstd_compress xxhash raid6_pq ata_generic pata_acpi xen_blkfront xen_netfront ata_piix libata crc32c_intel serio_raw floppy
[3581719.425262] CPU: 0 PID: 0 Comm: swapper/0 Tainted: P           O    4.14.35-1902.5.2.el7uek.x86_64 #2
[3581719.425316] Hardware name: Xen HVM domU, BIOS 4.4.4OVM 08/14/2017
[3581719.425352] task: ffffffffa7412480 task.stack: ffffffffa7400000
[3581719.425390] RIP: 0010:xennet_poll+0x2bb/0xb30 [xen_netfront]
[3581719.425425] RSP: 0018:ffff98c029003dd8 EFLAGS: 00010287
[3581719.425458] RAX: 0000000000000142 RBX: 0000000000000000 RCX: ffff98c025b5a900
[3581719.425501] RDX: 0000000000000000 RSI: ffff98c023924d20 RDI: 00000000ffffffff
[3581719.427135] RBP: ffff98c029003ed8 R08: ffff98c023924528 R09: 0000000000000000
[3581719.428757] R10: 0000000000000000 R11: 000000000000008c R12: ffff98c029003e80
[3581719.430382] R13: 0000000000000001 R14: 0000000000000000 R15: 0000000000000000
[3581719.431998] FS:  0000000000000000(0000) GS:ffff98c029000000(0000) knlGS:0000000000000000
[3581719.433625] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[3581719.435243] CR2: 000000000000002c CR3: 00000007ec40a002 CR4: 00000000001606f0
[3581719.436880] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[3581719.438505] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[3581719.440088] Call Trace:
[3581719.441640]  <IRQ>
[3581719.443172]  ? __slab_free+0x9b/0x2ba
[3581719.444690]  net_rx_action+0x289/0x3f4
[3581719.446165]  __do_softirq+0xd9/0x28d
[3581719.447603]  irq_exit+0xdf/0xe5
[3581719.449037]  xen_evtchn_do_upcall+0x30/0x3b
[3581719.450439]  xen_hvm_callback_vector+0x1c2/0x1c7
[3581719.451801]  </IRQ>
[3581719.453159] RIP: 0010:native_safe_halt+0x12/0x14
[3581719.454526] RSP: 0018:ffffffffa7403e18 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff0c
[3581719.455922] RAX: ffffffffa6869e10 RBX: ffffffffa7412480 RCX: 0000000000000000
[3581719.457343] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
[3581719.458735] RBP: ffffffffa7403e18 R08: 00000000f46135a0 R09: 0000000000000000
[3581719.460086] R10: 0000000000000216 R11: 00000000000012c0 R12: 0000000000000000
[3581719.461400] R13: ffffffffa7412480 R14: 0000000000000000 R15: 0000000000000000
[3581719.462723]  ? __sched_text_end+0x5/0x0
[3581719.464039]  default_idle+0x1e/0xfc
[3581719.465351]  arch_cpu_idle+0x15/0x17
[3581719.466636]  default_idle_call+0x23/0x31
[3581719.467902]  do_idle+0x158/0x1d5
[3581719.469140]  cpu_startup_entry+0x73/0x75
[3581719.470393]  rest_init+0xaa/0xac
[3581719.471639]  start_kernel+0x540/0x561
[3581719.472858]  ? set_init_arg+0x5a/0x5a
[3581719.474054]  x86_64_start_reservations+0x32/0x34
[3581719.475253]  x86_64_start_kernel+0x74/0x77
[3581719.476451]  secondary_startup_64+0xa5/0xa5
[3581719.477615] Code: b4 1d 00 00 00 00 00 00 41 0f b7 44 24 06 66 85 c0 0f 88 e3 06 00 00 41 0f b7 54 24 02 98 01 d0 3d 00 10 00 00 0f 87 cf 06 00 00 <8b> 7b 2c 85 ff 0f 85 4f ff ff ff 45 85 f6 4c 89 85 48 ff ff ff 
[3581719.479997] RIP: xennet_poll+0x2bb/0xb30 [xen_netfront] RSP: ffff98c029003dd8
[3581719.481132] CR2: 000000000000002c
[3581719.482286] ---[ end trace 33683746988514c8 ]---
[3581719.483360] Kernel panic - not syncing: Fatal exception in interrupt
[3581719.484594] Kernel Offset: 0x25000000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff)

Backtrace :

crash7latest> bt -l
PID: 0      TASK: ffffffffa7412480  CPU: 0   COMMAND: "swapper/0"
 #0 [ffffffffa7403db0] __schedule at ffffffffa68648cc
    /usr/src/debug/kernel-4.14.35/linux-4.14.35-1902.5.2.el7uek/kernel/sched/core.c: 2838
 #1 [ffffffffa7403de8] native_safe_halt at ffffffffa686a1c2
    /usr/src/debug/kernel-4.14.35/linux-4.14.35-1902.5.2.el7uek/arch/x86/include/asm/irqflags.h: 61
 #2 [ffffffffa7403e20] default_idle at ffffffffa6869e2e
    /usr/src/debug/kernel-4.14.35/linux-4.14.35-1902.5.2.el7uek/arch/x86/include/asm/paravirt.h: 94
 #3 [ffffffffa7403e40] arch_cpu_idle at ffffffffa603a2a5
    /usr/src/debug/kernel-4.14.35/linux-4.14.35-1902.5.2.el7uek/arch/x86/kernel/process.c: 559
 #4 [ffffffffa7403e50] default_idle_call at ffffffffa686a363
    /usr/src/debug/kernel-4.14.35/linux-4.14.35-1902.5.2.el7uek/kernel/sched/idle.c: 101
 #5 [ffffffffa7403e60] do_idle at ffffffffa60e0ee8
    /usr/src/debug/kernel-4.14.35/linux-4.14.35-1902.5.2.el7uek/kernel/sched/idle.c: 156
 #6 [ffffffffa7403e98] cpu_startup_entry at ffffffffa60e1163
    /usr/src/debug/kernel-4.14.35/linux-4.14.35-1902.5.2.el7uek/kernel/sched/idle.c: 351
 #7 [ffffffffa7403ec0] rest_init at ffffffffa685b65a
    /usr/src/debug/kernel-4.14.35/linux-4.14.35-1902.5.2.el7uek/init/main.c: 444
 #8 [ffffffffa7403ed0] start_kernel at ffffffffa768a2c0
    /usr/src/debug/kernel-4.14.35/linux-4.14.35-1902.5.2.el7uek/init/main.c: 723
 #9 [ffffffffa7403f28] x86_64_start_reservations at ffffffffa768966b
    /usr/src/debug/kernel-4.14.35/linux-4.14.35-1902.5.2.el7uek/arch/x86/kernel/head64.c: 395
#10 [ffffffffa7403f38] x86_64_start_kernel at ffffffffa76896e1
    /usr/src/debug/kernel-4.14.35/linux-4.14.35-1902.5.2.el7uek/arch/x86/kernel/head64.c: 376
#11 [ffffffffa7403f50] secondary_startup_64 at ffffffffa60000d5
    /usr/src/debug/kernel-4.14.35/linux-4.14.35-1902.5.2.el7uek/arch/x86/kernel/head_64.S: 239

Xen Guest VM 이므로 CPU0 에 대한 stack backtrace :

crash7latest> bt -E -c 0
CPU 0 IRQ STACK:

  KERNEL-MODE EXCEPTION FRAME AT: ffff98c029003d28
    [exception RIP: __dta_xennet_poll_55+699]
    RIP: ffffffffc01cf5cb  RSP: ffff98c029003dd8  RFLAGS: 00010287
    RAX: 0000000000000142  RBX: 0000000000000000  RCX: ffff98c025b5a900
    RDX: 0000000000000000  RSI: ffff98c023924d20  RDI: 00000000ffffffff
    RBP: ffff98c029003ed8   R8: ffff98c023924528   R9: 0000000000000000
    R10: 0000000000000000  R11: 000000000000008c  R12: ffff98c029003e80
    R13: 0000000000000001  R14: 0000000000000000  R15: 0000000000000000
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018

CPU 0 DOUBLEFAULT EXCEPTION STACK:
(none found)

CPU 0 NMI EXCEPTION STACK:
(none found)

CPU 0 DEBUG EXCEPTION STACK:
(none found)

CPU 0 MCE EXCEPTION STACK:
(none found)

RIP 에 대한 Disassemble :

crash7latest> edis -rgl __dta_xennet_poll_55+699
+-------*0xffffffffc01cf412 <__dta_xennet_poll_55+258>:	jge    0xffffffffc01cf890 <__dta_xennet_poll_55+1408>
|        0xffffffffc01cf418 <__dta_xennet_poll_55+264>:	cmp    -0xdc(%rbp),%r12d
|+------*0xffffffffc01cf41f <__dta_xennet_poll_55+271>:	je     0xffffffffc01cf890 <__dta_xennet_poll_55+1408>
||       0xffffffffc01cf425 <__dta_xennet_poll_55+277>:	mov    0x1568(%r15),%eax
||       0xffffffffc01cf42c <__dta_xennet_poll_55+284>:	mov    -0x8(%r15),%rcx
||       0xffffffffc01cf430 <__dta_xennet_poll_55+288>:	xor    %edi,%edi
||       0xffffffffc01cf432 <__dta_xennet_poll_55+290>:	movl   $0x0,-0xc0(%rbp)
||       0xffffffffc01cf43c <__dta_xennet_poll_55+300>:	sub    $0x1,%eax
||       0xffffffffc01cf43f <__dta_xennet_poll_55+303>:	and    %eax,%r12d
||       0xffffffffc01cf442 <__dta_xennet_poll_55+306>:	mov    0x1570(%r15),%rax
||       0xffffffffc01cf449 <__dta_xennet_poll_55+313>:	mov    0x40(%rax,%r12,8),%rdx
||       0xffffffffc01cf44e <__dta_xennet_poll_55+318>:	lea    -0x58(%rbp),%rax
||       0xffffffffc01cf452 <__dta_xennet_poll_55+322>:	movq   $0x0,-0x50(%rbp)
||       0xffffffffc01cf45a <__dta_xennet_poll_55+330>:	lea    0x8(%rax),%r14
||       0xffffffffc01cf45e <__dta_xennet_poll_55+334>:	movq   $0x0,0x8(%r14)
||       0xffffffffc01cf466 <__dta_xennet_poll_55+342>:	movq   $0x0,0x10(%r14)
||       0xffffffffc01cf46e <__dta_xennet_poll_55+350>:	movq   $0x0,0x18(%r14)
||       0xffffffffc01cf476 <__dta_xennet_poll_55+358>:	mov    0x10(%rcx),%rax
||       0xffffffffc01cf47a <__dta_xennet_poll_55+362>:	mov    %rdx,-0x58(%rbp)
||       0xffffffffc01cf47e <__dta_xennet_poll_55+366>:	add    $0x508,%rax
||       0xffffffffc01cf484 <__dta_xennet_poll_55+372>:	mov    %rax,-0xd8(%rbp)
||       0xffffffffc01cf48b <__dta_xennet_poll_55+379>:	mov    0x1564(%r15),%eax
||       0xffffffffc01cf492 <__dta_xennet_poll_55+386>:	mov    %eax,-0xc8(%rbp)
||       0xffffffffc01cf498 <__dta_xennet_poll_55+392>:	movzbl %al,%eax
||       0xffffffffc01cf49b <__dta_xennet_poll_55+395>:	lea    (%r15,%rax,8),%rsi
||       0xffffffffc01cf49f <__dta_xennet_poll_55+399>:	lea    (%r15,%rax,4),%rax
||       0xffffffffc01cf4a3 <__dta_xennet_poll_55+403>:	mov    0x15b0(%rsi),%rbx
||       0xffffffffc01cf4aa <__dta_xennet_poll_55+410>:	movq   $0x0,0x15b0(%rsi)
||       0xffffffffc01cf4b5 <__dta_xennet_poll_55+421>:	mov    0x1db4(%rax),%r10d
||       0xffffffffc01cf4bc <__dta_xennet_poll_55+428>:	movl   $0x0,0x1db4(%rax)
||       0xffffffffc01cf4c6 <__dta_xennet_poll_55+438>:	mov    %rdx,%rax
||       0xffffffffc01cf4c9 <__dta_xennet_poll_55+441>:	shr    $0x30,%rax
||       0xffffffffc01cf4cd <__dta_xennet_poll_55+445>:	cmp    $0x101,%ax
||       0xffffffffc01cf4d1 <__dta_xennet_poll_55+449>:	setl   %dil
||       0xffffffffc01cf4d5 <__dta_xennet_poll_55+453>:	shr    $0x20,%rdx
||       0xffffffffc01cf4d9 <__dta_xennet_poll_55+457>:	mov    %edi,-0xc4(%rbp)
||       0xffffffffc01cf4df <__dta_xennet_poll_55+463>:	addl   $0x12,-0xc4(%rbp)
||       0xffffffffc01cf4e6 <__dta_xennet_poll_55+470>:	and    $0x8,%edx
||+-----*0xffffffffc01cf4e9 <__dta_xennet_poll_55+473>:	jne    0xffffffffc01cfa04 <__dta_xennet_poll_55+1780>
|||      0xffffffffc01cf4ef <__dta_xennet_poll_55+479>:	mov    -0xc8(%rbp),%edi
|||      0xffffffffc01cf4f5 <__dta_xennet_poll_55+485>:	mov    -0xdc(%rbp),%r13d
|||      0xffffffffc01cf4fc <__dta_xennet_poll_55+492>:	mov    $0x1,%r14d
|||      0xffffffffc01cf502 <__dta_xennet_poll_55+498>:	mov    %r15,%r8
|||      0xffffffffc01cf505 <__dta_xennet_poll_55+501>:	lea    -0x58(%rbp),%r12
|||      0xffffffffc01cf509 <__dta_xennet_poll_55+505>:	lea    0x1(%rdi),%r9d
|||      0xffffffffc01cf50d <__dta_xennet_poll_55+509>:	sub    %edi,%r13d
|||      0xffffffffc01cf510 <__dta_xennet_poll_55+512>:	mov    %r13d,-0xa8(%rbp)
|||      0xffffffffc01cf517 <__dta_xennet_poll_55+519>:	mov    %r14d,%r13d
|||      0xffffffffc01cf51a <__dta_xennet_poll_55+522>:	mov    %r10d,%r14d
|||      0xffffffffc01cf51d <__dta_xennet_poll_55+525>:	mov    %r9d,%r15d
|||+----*0xffffffffc01cf520 <__dta_xennet_poll_55+528>:	jmpq   0xffffffffc01cf5ae <__dta_xennet_poll_55+670>
||||     0xffffffffc01cf525 <__dta_xennet_poll_55+533>:	mov    -0x68(%rbp),%rax
||||     0xffffffffc01cf529 <__dta_xennet_poll_55+537>:	lea    -0x70(%rbp),%rcx
||||     0xffffffffc01cf52d <__dta_xennet_poll_55+541>:	mov    %rcx,(%rbx)
||||     0xffffffffc01cf530 <__dta_xennet_poll_55+544>:	mov    %rax,0x8(%rbx)
||||     0xffffffffc01cf534 <__dta_xennet_poll_55+548>:	mov    %rbx,(%rax)
||||     0xffffffffc01cf537 <__dta_xennet_poll_55+551>:	addl   $0x1,-0x60(%rbp)
||||     0xffffffffc01cf53b <__dta_xennet_poll_55+555>:	mov    %rbx,-0x68(%rbp)
||||     0xffffffffc01cf53f <__dta_xennet_poll_55+559>:	testb  $0x4,0x4(%r12)
||||+---*0xffffffffc01cf545 <__dta_xennet_poll_55+565>:	je     0xffffffffc01cf631 <__dta_xennet_poll_55+801>
|||||    0xffffffffc01cf54b <__dta_xennet_poll_55+571>:	cmp    -0xa8(%rbp),%r13d
|||||+--*0xffffffffc01cf552 <__dta_xennet_poll_55+578>:	je     0xffffffffc01cf9b0 <__dta_xennet_poll_55+1696>
||||||   0xffffffffc01cf558 <__dta_xennet_poll_55+584>:	mov    0x1568(%r8),%eax
||||||   0xffffffffc01cf55f <__dta_xennet_poll_55+591>:	add    $0x1,%r13d
||||||   0xffffffffc01cf563 <__dta_xennet_poll_55+595>:	lea    -0x1(%rax),%edx
||||||   0xffffffffc01cf566 <__dta_xennet_poll_55+598>:	mov    0x1570(%r8),%rax
||||||   0xffffffffc01cf56d <__dta_xennet_poll_55+605>:	and    %r15d,%edx
||||||   0xffffffffc01cf570 <__dta_xennet_poll_55+608>:	lea    0x40(%rax,%rdx,8),%r12
||||||   0xffffffffc01cf575 <__dta_xennet_poll_55+613>:	movzbl %r15b,%eax
||||||   0xffffffffc01cf579 <__dta_xennet_poll_55+617>:	add    $0x1,%r15d
||||||   0xffffffffc01cf57d <__dta_xennet_poll_55+621>:	lea    (%r8,%rax,8),%rdx
||||||   0xffffffffc01cf581 <__dta_xennet_poll_55+625>:	lea    (%r8,%rax,4),%rax
||||||   0xffffffffc01cf585 <__dta_xennet_poll_55+629>:	mov    0x15b0(%rdx),%rbx
||||||   0xffffffffc01cf58c <__dta_xennet_poll_55+636>:	movq   $0x0,0x15b0(%rdx)
||||||   0xffffffffc01cf597 <__dta_xennet_poll_55+647>:	mov    0x1db4(%rax),%r14d
||||||   0xffffffffc01cf59e <__dta_xennet_poll_55+654>:	movl   $0x0,0x1db4(%rax)
||||||   0xffffffffc01cf5a8 <__dta_xennet_poll_55+664>:	movzwl 0x6(%r12),%eax
|||+====>0xffffffffc01cf5ae <__dta_xennet_poll_55+670>:	test   %ax,%ax
||| ||+-*0xffffffffc01cf5b1 <__dta_xennet_poll_55+673>:	js     0xffffffffc01cfc9a <__dta_xennet_poll_55+2442>
||| |||  0xffffffffc01cf5b7 <__dta_xennet_poll_55+679>:	movzwl 0x2(%r12),%edx
||| |||  0xffffffffc01cf5bd <__dta_xennet_poll_55+685>:	cwtl   
||| |||  0xffffffffc01cf5be <__dta_xennet_poll_55+686>:	add    %edx,%eax
||| |||  0xffffffffc01cf5c0 <__dta_xennet_poll_55+688>:	cmp    $0x1000,%eax
||| |||+*0xffffffffc01cf5c5 <__dta_xennet_poll_55+693>:	ja     0xffffffffc01cfc9a <__dta_xennet_poll_55+2442>
||| |||| 0xffffffffc01cf5cb <__dta_xennet_poll_55+699>:	mov    0x2c(%rbx),%edi   << Trapped

Trapped 로 표시된 부분의 코드 확인 :

static int xennet_get_responses(struct netfront_queue *queue,
                 struct netfront_rx_info *rinfo, RING_IDX rp,
                 struct sk_buff_head *list)
 {
     struct xen_netif_rx_response *rx = &rinfo->rx;
     struct xen_netif_extra_info *extras = rinfo->extras;
     struct device *dev = &queue->info->netdev->dev;
     RING_IDX cons = queue->rx.rsp_cons;
 .. Snip ..
 
         if (NETFRONT_SKB_CB(skb)->ref != GRANT_INVALID_REF) { <---
             __skb_queue_tail(list, skb);
             goto next;
         }

Xen 의 netfront 부분을 확인해야 하므로 netfront 모듈 install :

crash7latest> mod -s xen_netfront /home/mirrl/usr/lib/debug/lib/modules/4.14.35-1902.5.2.el7uek.x86_64/kernel/drivers/net/xen-netfront.ko.debug                                          e/usr/lib/debug/lib/modules/4.14.35-1902.5.2.el7uek.x86_64/kernel/drivers/net/xen-netfront.ko.debug                                          e/usr/lib/debug/lib/modules/4.14.35-1902.5.2.el7uek.x86_64/kernel/drivers/net/xen-netfront.ko.debug
     MODULE       NAME                       BASE           SIZE  OBJECT FILE
ffffffffc01d43c0  xen_netfront         ffffffffc01cd000    36864  /home/mirrlee/usr/lib/debug/lib/modules/4.14.35-1902.5.2.el7uek.x86_64/kernel/drivers/net/xen-netfront.ko.debug

RSP : fff98c029003dd8 에 대한 NAPI 와 netfront_queue 상태 추적 ( struct sk_buff 에 대한 member 단위 역추적 ) :

crash7latest> edis -rgl __dta_xennet_poll_55+699         
         0xffffffffc01cf310 <xennet_poll>:	nopl   0x0(%rax,%rax,1) [FTRACE NOP]
         0xffffffffc01cf315 <__dta_xennet_poll_55+5>:	push   %rbp
         0xffffffffc01cf316 <__dta_xennet_poll_55+6>:	mov    %rsp,%rbp
         0xffffffffc01cf319 <__dta_xennet_poll_55+9>:	push   %r15
         0xffffffffc01cf31b <__dta_xennet_poll_55+11>:	mov    %rdi,%r15
         0xffffffffc01cf31e <__dta_xennet_poll_55+14>:	push   %r14
         0xffffffffc01cf320 <__dta_xennet_poll_55+16>:	push   %r13
         0xffffffffc01cf322 <__dta_xennet_poll_55+18>:	push   %r12
         0xffffffffc01cf324 <__dta_xennet_poll_55+20>:	push   %rbx
         0xffffffffc01cf325 <__dta_xennet_poll_55+21>:	sub    $0xd8,%rsp    <<<

crash7latest> eval ffff98c029003dd8 + 0xd8
hexadecimal: ffff98c029003eb0  
    decimal: 18446630549821865648  (-113523887685968)
      octal: 1777774614005100037260
     binary: 1111111111111111100110001100000000101001000000000011111010110000
crash7latest> eval ffff98c029003eb0+0x18
hexadecimal: ffff98c029003ec8  
    decimal: 18446630549821865672  (-113523887685944)
      octal: 1777774614005100037310
     binary: 1111111111111111100110001100000000101001000000000011111011001000
crash7latest> rd ffff98c029003ec8
ffff98c029003ec8:  ffff98c023924528                    (E.#....
crash7latest> struct napi_struct ffff98c023924528|more
struct napi_struct {
  poll_list = {
    next = 0xffff98c023924528, 
    prev = 0xffff98c023924528
  }, 
  state = 17, 
  weight = 64, 
  gro_count = 0, 
  poll = 0xffffffffc01cf310, 
  poll_owner = -1, 
  dev = 0xffff98c025b5a000, 
  gro_list = 0x0,
  
  ... snip ...
    rx = {
    req_prod_pvt = 255, 
    rsp_cons = 4294967295, 
    nr_ents = 256, 
    sring = 0xffff98c005f1e000
  },

xen_netif_rx_front_ring 체크 :

crash7latest> struct net_device 0xffff98c025b5a000|more
struct net_device {
  name = "eth1\000\000\000\000\000\000\000\000\000\000\000", 
  name_hlist = {
    next = 0x0, 
    pprev = 0xffff98c029b35158
  }, 
  ifalias = 0x0, 
  mem_end = 0, 

 [[[ Snip ]]]

  stats = {
    rx_packets = 0, 
    tx_packets = 0, 
    rx_bytes = 0, 
    tx_bytes = 0, 
    rx_errors = 1,                          <<<<< RX error

몇차례 반복되어 생성된 여러개의 vmcore 분석을 통해 Ring buffer 의 index가 255 에 도달할 경우 Overfflow 발생 확인 :

crash7latest> xen_netif_rx_front_ring 0xffff98c005f1e000
struct xen_netif_rx_front_ring {
  req_prod_pvt = 255, 
  rsp_cons = 1, 
  nr_ents = 193, 
  sring = 0x0
}

유사케이스 확인 :

https://lists.xenproject.org/archives/html/xen-devel/2019-08/msg01771.html

Code Fixed by V4.14.35-1902.7.0 over.