흥미있는 버그를 발견했다.. 물론 알려진 버그인데,
분석하는 과정을 개인적으로 업데이트하고 공유한다.
이슈는 엑사데이타에서 사용하는 UEK2 (2.6.39-400.264.1) 커널의 crash 이다.
GNU gdb (GDB) 7.6
Copyright (C) 2013 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law. Type "show copying"
and "show warranty" for details.
This GDB was configured as "x86_64-unknown-linux-gnu"...
KERNEL: /share/linuxrpm/vmlinux_repo/64/2.6.39-400.264.1.el6uek.x86_64/vmlinux
DUMPFILE: vmcore [PARTIAL DUMP]
CPUS: 36
DATE: Thu Mar 30 08:42:38 2017
UPTIME: 427 days, 16:24:58
LOAD AVERAGE: 1.64, 1.51, 1.50
TASKS: 1300
NODENAME: o***dev02.*****.com
RELEASE: 2.6.39-400.264.1.el6uek.x86_64
VERSION: #1 SMP Wed Aug 26 16:42:25 PDT 2015
MACHINE: x86_64 (2294 Mhz)
MEMORY: 255.9 GB
PANIC: "kernel BUG at mm/slab.c:512!"
PID: 11003
COMMAND: "kworker/8:2"
TASK: ffff8803e315c200 [THREAD_INFO: ffff88045f3a8000]
CPU: 8
STATE: TASK_RUNNING (PANIC)
노드(호스트) 정보는 보안정책상 제거한다.
패닉 이유로 버그라고 표시된 것을 볼 수 있다. 백트레이스를 일단 해보자.
crash64> bt -l
PID: 11003 TASK: ffff8803e315c200 CPU: 8 COMMAND: "kworker/8:2"
#0 [ffff88045f3a99a0] machine_kexec at ffffffff8103abf9
/usr/src/debug/kernel-2.6.39/linux-2.6.39-400.264.1.el6uek/arch/x86/kernel/machine_kexec_64.c: 339
#1 [ffff88045f3a9a10] crash_kexec at ffffffff810b96e3
/usr/src/debug/kernel-2.6.39/linux-2.6.39-400.264.1.el6uek/kernel/kexec.c: 1140
#2 [ffff88045f3a9ae0] oops_end at ffffffff8150f518
/usr/src/debug/kernel-2.6.39/linux-2.6.39-400.264.1.el6uek/arch/x86/kernel/dumpstack.c: 228
#3 [ffff88045f3a9b10] die at ffffffff8101878b
/usr/src/debug/kernel-2.6.39/linux-2.6.39-400.264.1.el6uek/arch/x86/kernel/dumpstack.c: 306
#4 [ffff88045f3a9b40] do_trap at ffffffff8150f0a4
/usr/src/debug/kernel-2.6.39/linux-2.6.39-400.264.1.el6uek/arch/x86/kernel/traps.c: 169
#5 [ffff88045f3a9ba0] do_invalid_op at ffffffff810166e5
/usr/src/debug/kernel-2.6.39/linux-2.6.39-400.264.1.el6uek/arch/x86/kernel/traps.c: 210
#6 [ffff88045f3a9c40] invalid_op at ffffffff815176fb
/usr/src/debug/kernel-2.6.39/linux-2.6.39-400.264.1.el6uek/arch/x86/kernel/entry_64.S: 1117
[exception RIP: free_block+362]
RIP: ffffffff8115cd4a RSP: ffff88045f3a9cf0 RFLAGS: 00010046
RAX: ffffea000d89f490 RBX: ffff883fbf050d40 RCX: 000000000000100c
RDX: 0020000000000000 RSI: ffff881fbdbb9898 RDI: ffff8803de45e0c0
RBP: ffff88045f3a9d40 R8: 0000000000000000 R9: 0000000000000000
R10: 0000000000000001 R11: ffff8803e23d1788 R12: 0000000000000001
R13: ffff881fbdbb9898 R14: 0000000000000000 R15: ffffea0000000000
ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
/usr/src/debug/kernel-2.6.39/linux-2.6.39-400.264.1.el6uek/mm/slab.c: 512
#7 [ffff88045f3a9d48] drain_array at ffffffff8115cfa1
/usr/src/debug/kernel-2.6.39/linux-2.6.39-400.264.1.el6uek/mm/slab.c: 4072
#8 [ffff88045f3a9d98] cache_reap at ffffffff8115d7fe
/usr/src/debug/kernel-2.6.39/linux-2.6.39-400.264.1.el6uek/mm/slab.c: 4121
#9 [ffff88045f3a9df8] process_one_work at ffffffff8108a7c6
/usr/src/debug/kernel-2.6.39/linux-2.6.39-400.264.1.el6uek/arch/x86/include/asm/atomic.h: 25
#10 [ffff88045f3a9e58] worker_thread at ffffffff8108bda0
/usr/src/debug/kernel-2.6.39/linux-2.6.39-400.264.1.el6uek/include/linux/list.h: 188
#11 [ffff88045f3a9ee8] kthread at ffffffff81090fe6
/usr/src/debug/kernel-2.6.39/linux-2.6.39-400.264.1.el6uek/kernel/kthread.c: 96
#12 [ffff88045f3a9f48] kernel_thread_helper at ffffffff81517884
/usr/src/debug/kernel-2.6.39/linux-2.6.39-400.264.1.el6uek/arch/x86/kernel/entry_64.S: 1166
중요한 부분은 볼드처리 했다. 일단 free_block 에 대해서 문제가 있는걸 보아,
더블프리버그가 아닐까 살짝 의심해 본다. 더 살펴보도록 하자.
crash64> dis -rl ffffffff8115cd4a| tail -n 10
/usr/src/debug/kernel-2.6.39/linux-2.6.39-400.264.1.el6uek/include/linux/list.h: 76
0xffffffff8115cd30 <free_block+336>: mov 0x8(%rdx),%rsi
0xffffffff8115cd34 <free_block+340>: mov %rax,%rdi
0xffffffff8115cd37 <free_block+343>: callq 0xffffffff81268720 <__list_add>
0xffffffff8115cd3c <free_block+348>: jmpq 0xffffffff8115cc40 <free_block+96>
/usr/src/debug/kernel-2.6.39/linux-2.6.39-400.264.1.el6uek/include/linux/mm.h: 354
0xffffffff8115cd41 <free_block+353>: mov 0x10(%rax),%rax
0xffffffff8115cd45 <free_block+357>: jmpq 0xffffffff8115cc83 <free_block+163>
/usr/src/debug/kernel-2.6.39/linux-2.6.39-400.264.1.el6uek/mm/slab.c: 512
0xffffffff8115cd4a <free_block+362>: ud2
crash64> p 0xffffffff8115cc83
$1 = 18446744071580273795
crash64> slab.inuse
struct slab {
[32] unsigned int inuse;
}
crash64> slab.inuse 0xffffffff8115cc83
inuse = 3903870345
crash64> slab.inuse 0xffffffff8115cc40
inuse = 3904981247
crash64> slab.inuse ffffea000d89f490
inuse = 6
crash64> kmem_cache.num ffffea000d89f490
num = 13
crash64> kmem_cache.num 0xffffffff8115cc40
num = 1096119515
crash64> kmem_cache.num ffff883fbf050d40
num = 3
crash64> dis ffffffff8115cd4a
0xffffffff8115cd4a <free_block+362>: ud2
page lru 를 확인해 보자.
crash64> page.lru
struct page {
[40] struct list_head lru;
}
crash64> page.lru ffffffff8115cd4a
lru = {
next = 0x894cf075894ce86d,
prev = 0x480000441f0ff87d
}
crash64> page
struct page {
unsigned long flags;
atomic_t _count;
union {
atomic_t _mapcount;
struct {
u16 inuse;
u16 objects;
};
};
union {
struct {
unsigned long private;
struct address_space *mapping;
};
spinlock_t ptl;
struct kmem_cache *slab;
struct page *first_page;
};
union {
unsigned long index;
void *freelist;
};
struct list_head lru;
}
SIZE: 56
crash64> page ffffffff8115cd4a
struct page {
flags = 18080545404407974671,
_count = {
counter = 1717986918
},
{
_mapcount = {
counter = 521088614
},
{
inuse = 11878,
objects = 7951
}
},
{
{
private = 5212072143751217284,
mapping = 0x894830ec8348e589
},
ptl = {
{
rlock = {
raw_lock = {
slock = 132
}
}
}
},
slab = 0x4855000000000084,
first_page = 0x4855000000000084
},
{
index = 9893529208125970525,
freelist = 0x894ce065894cd85d
},
lru = {
next = 0x894cf075894ce86d,
prev = 0x480000441f0ff87d
}
}
crash64> page.lru ffffffff8115cd4a
lru = {
next = 0x894cf075894ce86d,
prev = 0x480000441f0ff87d
}
여기서 slab 들을 살펴볼 수 있다.
crash64> kmem_cache
struct kmem_cache {
struct array_cache *array[4096];
unsigned int batchcount;
unsigned int limit;
unsigned int shared;
unsigned int buffer_size;
u32 reciprocal_buffer_size;
unsigned int flags;
unsigned int num;
unsigned int gfporder;
gfp_t gfpflags;
size_t colour;
unsigned int colour_off;
struct kmem_cache *slabp_cache;
unsigned int slab_size;
unsigned int dflags;
void (*ctor)(void *);
const char *name;
struct list_head next;
struct kmem_list3 *nodelists[1024];
}
SIZE: 41064
crash64> kmem_cache.slabp_cache
struct kmem_cache {
[32824] struct kmem_cache *slabp_cache;
}
crash64> kmem_cache.slabp_cache ffffffff8115cd4a
slabp_cache = 0xd4840f000000
crash64> kmem_cache.slab_size ffffffff8115cd4a
slab_size = 1288253696
다시한번 backtrace 내용을 살펴보자.
crash64> bt -f | grep -e free_block -A 25
[exception RIP: free_block+362]
RIP: ffffffff8115cd4a RSP: ffff88045f3a9cf0 RFLAGS: 00010046
RAX: ffffea000d89f490 RBX: ffff883fbf050d40 RCX: 000000000000100c
RDX: 0020000000000000 RSI: ffff881fbdbb9898 RDI: ffff8803de45e0c0
RBP: ffff88045f3a9d40 R8: 0000000000000000 R9: 0000000000000000
R10: 0000000000000001 R11: ffff8803e23d1788 R12: 0000000000000001
R13: ffff881fbdbb9898 R14: 0000000000000000 R15: ffffea0000000000
ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
ffff88045f3a9c48: ffffea0000000000 0000000000000000
ffff88045f3a9c58: ffff881fbdbb9898 0000000000000001
ffff88045f3a9c68: ffff88045f3a9d40 ffff883fbf050d40
ffff88045f3a9c78: ffff8803e23d1788 0000000000000001
ffff88045f3a9c88: 0000000000000000 0000000000000000
ffff88045f3a9c98: ffffea000d89f490 000000000000100c
ffff88045f3a9ca8: 0020000000000000 ffff881fbdbb9898
ffff88045f3a9cb8: ffff8803de45e0c0 ffffffffffffffff
ffff88045f3a9cc8: ffffffff8115cd4a 0000000000000010
ffff88045f3a9cd8: 0000000000010046 ffff88045f3a9cf0
ffff88045f3a9ce8: 0000000000000018 ffff881fbdbd8a40
ffff88045f3a9cf8: ffff8803e23d1000 000000000000100c
ffff88045f3a9d08: ffff8803de45e0c0 0000000000000282
ffff88045f3a9d18: ffff881fbdbb9880 ffff883fbf050d40
ffff88045f3a9d28: ffff881fbdbd8780 0000000000000001
ffff88045f3a9d38: ffff881fbdbb9898 ffff88045f3a9d90
ffff88045f3a9d48: ffffffff8115cfa1
#7 [ffff88045f3a9d48] drain_array at ffffffff8115cfa1
crash64> dis -l ffffffff8115cfa1
/usr/src/debug/kernel-2.6.39/linux-2.6.39-400.264.1.el6uek/mm/slab.c: 4072
0xffffffff8115cfa1 <drain_array+193>: mov (%rbx),%eax
일단 해당 코드는 아래와 같았다.
static inline struct kmem_cache *page_get_cache(struct page *page)
{
page = compound_head(page);
BUG_ON(!PageSlab(page)); <<<
return (struct kmem_cache *)page->lru.next;
Slab 정보를 갖고오는데 문제가 발생하는 것으로 보인다.
RBX 에 대한 cache 정보를 확인해 보자.
crash64> kmem_cache ffff883fbf050d40
struct kmem_cache {
array = {0xffff881fbdbb9080, 0xffff881fbdbb9180, 0xffff881fbdbb9280, 0xffff881fbdbb9380, 0xffff881fbdbb9480, 0xffff881fbdbb9580, 0xffff881fbdbb9680, 0xffff881fbdbb9780, 0xffff881fbdbb9880, 0xffff883fbfb085c0, 0xffff883fbfb086c0, 0xffff883fbfb087c0, 0xffff883fbfb088c0, 0xffff883fbfb089c0, 0xffff883fbf7a4e80, 0xffff883fbf7a4d80, 0xffff883fbf7a4c80, 0xffff883fbf7a4b80, 0xffff881fbdbb9980, 0xffff881fbdbb9a80, 0xffff881fbdbb9b80, 0xffff881fbdbb9c80, 0xffff881fbdbb9d80, 0xffff881fbdbb9e80, 0xffff881fbdaaf0c0, 0xffff881fbdaaf1c0, 0xffff881fbdaaf2c0, 0xffff883fbf7a4a80, 0xffff883fbf7a4980, 0xffff883fbf7a4880, 0xffff883fbf7a4780, 0xffff883fbf7a4680, 0xffff883fbf7a4580, 0xffff883fbf7a4480, 0xffff883fbf7a4380, 0xffff883fbf7a4280, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0...},
batchcount = 12,
limit = 24,
shared = 8,
buffer_size = 1128,
reciprocal_buffer_size = 3807596,
flags = 131072,
num = 3,
gfporder = 0,
gfpflags = 0,
colour = 10,
colour_off = 64,
slabp_cache = 0x0,
slab_size = 64,
dflags = 0,
ctor = 0x0,
name = 0xffffffffa00d772b "ext4_io_end",
next = {
next = 0xffff883fbf048d58,
prev = 0xffff883fbf068dd8
},
nodelists = {0xffff881fbdbd8740, 0xffff883fbffb8440, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0...}
}
역시 주요 메모리 번지를 뻘겋게 처리 했다.
우린 여기서 해당 RBX 의 cache 명은 ext_io_end 임을 알 수 있고, I/O 종료처리라고 짐작할 수 있다.
해당 노드의 리스트를 확인해 보아야만 한다.
crash64> kmem_list3 0xffff881fbdbd8740
struct kmem_list3 {
slabs_partial = {
next = 0xffff8803e435c1c0,
prev = 0xffff8803de553200
},
slabs_full = {
next = 0xffff881fbdbd8750,
prev = 0xffff881fbdbd8750
},
slabs_free = {
next = 0xffff8803de783140,
prev = 0xffff8803de783140
},
free_objects = 7,
free_limit = 231,
colour_next = 2,
list_lock = {
{
rlock = {
raw_lock = {
slock = 2771625265
}
}
}
},
shared = 0xffff881fbdbfc000,
alien = 0xffff881fbdb49a40,
next_reap = 41246567727,
free_touched = 0
}
crash64> list -h 0xffff881fbdbd8740
ffff881fbdbd8740
ffff8803e435c1c0
ffff8803de6fe080
ffff8803de553200
crash64> list -h 0xffff8803e435c1c0
ffff8803e435c1c0
ffff8803de6fe080
ffff8803de553200
ffff881fbdbd8740
crash64>
slab 의 구성을 따라가 보자.
crash64> slab 0xffff881fbdbd8740
struct slab {
{
{
list = {
next = 0xffff8803e435c1c0,
prev = 0xffff8803de553200
},
colouroff = 18446612268641519440,
s_mem = 0xffff881fbdbd8750,
inuse = 3732418880,
free = 4294936579,
nodeid = 12608
},
__slab_cover_slab_rcu = {
head = {
next = 0xffff8803e435c1c0,
func = 0xffff8803de553200
},
cachep = 0xffff881fbdbd8750,
addr = 0xffff881fbdbd8750
}
}
}
crash64> slab 0xffff8803e435c1c0
struct slab {
{
{
list = {
next = 0xffff8803de6fe080,
prev = 0xffff881fbdbd8740
},
colouroff = 512,
s_mem = 0xffff8803e435c200,
inuse = 2,
free = 1,
nodeid = 0
},
__slab_cover_slab_rcu = {
head = {
next = 0xffff8803de6fe080,
func = 0xffff881fbdbd8740
},
cachep = 0x200,
addr = 0xffff8803e435c200
}
}
}
crash64> slab 0xffff8803de6fe080
struct slab {
{
{
list = {
next = 0xffff8803de553200,
prev = 0xffff8803e435c1c0
},
colouroff = 192,
s_mem = 0xffff8803de6fe0c0,
inuse = 2,
free = 1,
nodeid = 0
},
__slab_cover_slab_rcu = {
head = {
next = 0xffff8803de553200,
func = 0xffff8803e435c1c0
},
cachep = 0xc0,
addr = 0xffff8803de6fe0c0
}
}
}
crash64> slab 0xffff8803de553200
struct slab {
{
{
list = {
next = 0xffff881fbdbd8740, <<< Loop
prev = 0xffff8803de6fe080
},
colouroff = 576,
s_mem = 0xffff8803de553240,
inuse = 1,
free = 0,
nodeid = 0
},
__slab_cover_slab_rcu = {
head = {
next = 0xffff881fbdbd8740,
func = 0xffff8803de6fe080
},
cachep = 0x240,
addr = 0xffff8803de553240
}
}
}
crash64> slab 0xffff881fbdbd8740
struct slab {
{
{
list = {
next = 0xffff8803e435c1c0,
prev = 0xffff8803de553200
},
colouroff = 18446612268641519440,
s_mem = 0xffff881fbdbd8750,
inuse = 3732418880,
free = 4294936579,
nodeid = 12608
},
__slab_cover_slab_rcu = {
head = {
next = 0xffff8803e435c1c0,
func = 0xffff8803de553200
},
cachep = 0xffff881fbdbd8750,
addr = 0xffff881fbdbd8750
}
}
}
crash64> slab 0xffff8803de783140
struct slab {
{
{
list = {
next = 0xffff881fbdbd8760,
prev = 0xffff881fbdbd8760
},
colouroff = 384,
s_mem = 0xffff8803de783180,
inuse = 0,
free = 1,
nodeid = 0
},
__slab_cover_slab_rcu = {
head = {
next = 0xffff881fbdbd8760,
func = 0xffff881fbdbd8760
},
cachep = 0x180,
addr = 0xffff8803de783180
}
}
}
crash64>
여기서 slabs_free 와 Slabs_full 은 상당히 중요하다.
캐쉬가 삭제될 때 slabs_free 와 slabs_full, slabs_partial 에서 리스트를 관리한다.
즉, 해제가 될 slab 의 경우 free 로 이동하고 reaping 되는데,
잘 보면 이게 계속 루프되고 있음을 알 수 있다.
이는 사실상 Async/Direct IO 상태로 사용시 ext4 의 io end 처리 구조체 및 펑션에서
더블프리가 될 수 있는 버그로 알려져있으며 패치가 나와있다.
Fixed -> UEK2 2.6.39-400.277.1
'Skills > System' 카테고리의 다른 글
Swappiness 에 대해서 정확히 알고갑시다. (0) | 2020.03.23 |
---|---|
How to mirroring to ULN repository on OL6 ? (0) | 2020.02.05 |
User-space bootless patching - Ksplice (0) | 2016.12.01 |
The Leap second (2) | 2015.06.20 |
Kernel dump analysis about the bug called as "devide by zero" (0) | 2014.01.06 |