LMKD 介绍
lmkd(Low Memory Killer Daemon)是低内存终止守护进程,用来监控运行中android系统内存的状态,通过终止最不必要的进程来应对内存压力较高的问题,使系统以可接受的水平运行。
启动
lmkd是属于core组服务进程,rc的配置如下:
sql 代码解读复制代码 service lmkd /system/bin/lmkd
class core
user lmkd
group lmkd system readproc
capabilities DAC_OVERRIDE KILL IPC_LOCK SYS_NICE SYS_RESOURCE
critical
socket lmkd seqpacket+passcred 0660 system system
writepid /dev/cpuset/system-background/tasks
on property:lmkd.reinit=1
exec_background /system/bin/lmkd --reinit
我们看下main函数主要作了哪些工作:
- 属性值的初始化函数update_props(),读取设备中属性配置,默认配置
c++ 代码解读复制代码static void update_props() {
/* By default disable low level vmpressure events */
level_oomadj[VMPRESS_LEVEL_LOW] =
property_get_int32("ro.lmk.low", OOM_SCORE_ADJ_MAX + 1);
level_oomadj[VMPRESS_LEVEL_MEDIUM] =
property_get_int32("ro.lmk.medium", 800);
level_oomadj[VMPRESS_LEVEL_CRITICAL] =
property_get_int32("ro.lmk.critical", 0);
debug_process_killing = property_get_bool("ro.lmk.debug", false);
/* By default disable upgrade/downgrade logic */
enable_pressure_upgrade =
property_get_bool("ro.lmk.critical_upgrade", false);
upgrade_pressure =
(int64_t)property_get_int32("ro.lmk.upgrade_pressure", 100);
downgrade_pressure =
(int64_t)property_get_int32("ro.lmk.downgrade_pressure", 100);
kill_heaviest_task =
property_get_bool("ro.lmk.kill_heaviest_task", false);
low_ram_device = property_get_bool("ro.config.low_ram", false);
kill_timeout_ms =
(unsigned long)property_get_int32("ro.lmk.kill_timeout_ms", 100);
use_minfree_levels =
property_get_bool("ro.lmk.use_minfree_levels", false);
per_app_memcg =
property_get_bool("ro.config.per_app_memcg", low_ram_device);
swap_free_low_percentage = clamp(0, 100, property_get_int32("ro.lmk.swap_free_low_percentage",
DEF_LOW_SWAP));
psi_partial_stall_ms = property_get_int32("ro.lmk.psi_partial_stall_ms",
low_ram_device ? DEF_PARTIAL_STALL_LOWRAM : DEF_PARTIAL_STALL);
psi_complete_stall_ms = property_get_int32("ro.lmk.psi_complete_stall_ms",
DEF_COMPLETE_STALL);
thrashing_limit_pct = max(0, property_get_int32("ro.lmk.thrashing_limit",
low_ram_device ? DEF_THRASHING_LOWRAM : DEF_THRASHING));
thrashing_limit_decay_pct = clamp(0, 100, property_get_int32("ro.lmk.thrashing_limit_decay",
low_ram_device ? DEF_THRASHING_DECAY_LOWRAM : DEF_THRASHING_DECAY));
thrashing_critical_pct = max(0, property_get_int32("ro.lmk.thrashing_limit_critical",
thrashing_limit_pct * 2));
}
- create_android_logger 创建android event logtag
c++ 代码解读复制代码 /* Android Logger event logtags (see event.logtags) */
#define KILLINFO_LOG_TAG 10195355
# for killinfo logs
10195355 killinfo (Pid|1|5),(Uid|1|5),(OomAdj|1),(MinOomAdj|1),(TaskSize|1),(enum kill_reasons|1|5),(MemFree|1),(Cached|1),(SwapCached|1),(Buffers|1),(Shmem|1),(Unevictable|1),(SwapTotal|1),(SwapFree|1),(ActiveAnon|1),(InactiveAnon|1),(ActiveFile|1),(InactiveFile|1),(SReclaimable|1),(SUnreclaim|1),(KernelStack|1),(PageTables|1),(IonHeap|1),(IonHeapPool|1),(CmaFree|1),(MsSinceEvent|1),(MsSincePrevWakeup|1),(WakeupsSinceEvent|1),(SkippedWakeups|1)
- 执行init()操作,初始化epoll事件监听,创建socket “/dev/socket/lmkd” 并监听socket 连接,当有客户端连接时,回调函数ctrl_connect_handler,注意这里我们用的是用户空间的lmkd,并不是内核空间的实现,这里use_inkernel_interface 是false
c++ 代码解读复制代码 has_inkernel_module = !access(INKERNEL_MINFREE_PATH, W_OK);
use_inkernel_interface = has_inkernel_module;
if (use_inkernel_interface) {
ALOGI("Using in-kernel low memory killer interface");
if (init_poll_kernel()) {
epev.events = EPOLLIN;
epev.data.ptr = (void*)&kernel_poll_hinfo;
if (epoll_ctl(epollfd, EPOLL_CTL_ADD, kpoll_fd, &epev) != 0) {
ALOGE("epoll_ctl for lmk events failed (errno=%d)", errno);
close(kpoll_fd);
kpoll_fd = -1;
} else {
maxevents++;
/* let the others know it does support reporting kills */
property_set("sys.lmk.reportkills", "1");
}
}
} else {
if (!init_monitors()) {
return -1;
}
/* let the others know it does support reporting kills */
property_set("sys.lmk.reportkills", "1");
}
- init_monitors()函数中主要调用了init_psi_monitors函数,init_psi_monitors函数主要调用init_mp_psi函数进行psi监控初始化
c++ 代码解读复制代码static bool init_mp_psi(enum vmpressure_level level, bool use_new_strategy) {
int fd;
/* Do not register a handler if threshold_ms is not set */
if (!psi_thresholds[level].threshold_ms) {
return true;
}
fd = init_psi_monitor(psi_thresholds[level].stall_type,
psi_thresholds[level].threshold_ms * US_PER_MS,
PSI_WINDOW_SIZE_MS * US_PER_MS);
if (fd < 0) {
return false;
}
vmpressure_hinfo[level].handler = use_new_strategy ? mp_event_psi : mp_event_common;
vmpressure_hinfo[level].data = level;
if (register_psi_monitor(epollfd, fd, &vmpressure_hinfo[level]) < 0) {
destroy_psi_monitor(fd);
return false;
}
maxevents++;
mpevfd[level] = fd;
return true;
}
- init_psi_monitor 往节点"/proc/pressure/memory"中写入stall_type,threshold_us,PSI_WINDOW_SIZE_MS
c++ 代码解读复制代码 fd = TEMP_FAILURE_RETRY(open(PSI_MON_FILE_MEMORY, O_WRONLY | O_CLOEXEC));
if (fd < 0) {
ALOGE("No kernel psi monitor support (errno=%d)", errno);
return -1;
}
switch (stall_type) {
case (PSI_SOME):
case (PSI_FULL):
res = snprintf(buf, sizeof(buf), "%s %d %d",
stall_type_name[stall_type], threshold_us, window_us);
break;
default:
ALOGE("Invalid psi stall type: %d", stall_type);
errno = EINVAL;
goto err;
}
if (res >= (ssize_t)sizeof(buf)) {
ALOGE("%s line overflow for psi stall type '%s'",
PSI_MON_FILE_MEMORY, stall_type_name[stall_type]);
errno = EINVAL;
goto err;
}
res = TEMP_FAILURE_RETRY(write(fd, buf, strlen(buf) + 1));
if (res < 0) {
ALOGE("%s write failed for psi stall type '%s'; errno=%d",
PSI_MON_FILE_MEMORY, stall_type_name[stall_type], errno);
goto err;
}
- main函数最后进入loop循环,epoll_wait等待系统内存压力上报
启动总结
总结一下启动顺序,这里由于都是在同一个进程和文件中的调用我们直接将函数作为时序图的节点信息
如何杀进程来释放内存
我们要区分两个handler:
- 用于处理和AMS的通信的handler:ctrl_data_handler
- 用于处理memory压力上报的handler:mp_event_common
这里我们要引入两个关于内存信息的节点,在函数mp_event_common中会先处理meminfo和zoneinfo的两个节点信息:
c++ 代码解读复制代码#define ZONEINFO_PATH "/proc/zoneinfo"
#define MEMINFO_PATH "/proc/meminfo"
// 这里解析meminfo和zoneinfo
if (meminfo_parse(&mi) < 0 || zoneinfo_parse(&zi) < 0) {
ALOGE("Failed to get free memory!");
return;
}
我们看下meminfo_parse函数主要作什么:
c++ 代码解读复制代码static int meminfo_parse(union meminfo *mi) {
static struct reread_data file_data = {
.filename = MEMINFO_PATH,
.fd = -1,
};
char *buf;
char *save_ptr;
char *line;
memset(mi, 0, sizeof(union meminfo));
if ((buf = reread_file(&file_data)) == NULL) {
return -1;
}
for (line = strtok_r(buf, "\n", &save_ptr); line;
line = strtok_r(NULL, "\n", &save_ptr)) {
if (!meminfo_parse_line(line, mi)) {
ALOGE("%s parse error", file_data.filename);
return -1;
}
}
mi->field.nr_file_pages = mi->field.cached + mi->field.swap_cached +
mi->field.buffers;
return 0;
}
下面看下zoneinfo_parse函数:
c++ 代码解读复制代码 /* calculate totals fields */
for (node_idx = 0; node_idx < zi->node_count; node_idx++) {
node = &zi->nodes[node_idx];
for (zone_idx = 0; zone_idx < node->zone_count; zone_idx++) {
struct zoneinfo_zone *zone = &zi->nodes[node_idx].zones[zone_idx];
zi->totalreserve_pages += zone->max_protection + zone->fields.field.high;
}
zi->total_inactive_file += node->fields.field.nr_inactive_file;
zi->total_active_file += node->fields.field.nr_active_file;
zi->total_workingset_refault += node->fields.field.workingset_refault;
}
继续往下看
c++ 代码解读复制代码if (use_minfree_levels) { // use_minfree_levels 为true
int i;
// other_free表示系统中可用内存,从meminfo和zoneinfo中算出来
// mi.field.nr_free_pages 代表meminof中的MemFree,表示当前系统空闲内存大小,是完全没有被占用的内存
//zi.totalreserve_pages 是zoneinfo中的max_protection + high,其中max_protection在android中为0
other_free = mi.field.nr_free_pages - zi.totalreserve_pages;
//nr_file_pages = Cached + SwapCached + Buffers 缓存页总和
if (mi.field.nr_file_pages > (mi.field.shmem + mi.field.unevictable + mi.field.swap_cached)) {
// other_file = Cached + SwapCached + Buffers - Shmem - Unevictable - SwapCached
other_file = (mi.field.nr_file_pages - mi.field.shmem -
mi.field.unevictable - mi.field.swap_cached);
} else {
other_file = 0;
}
// 根据上面算出来的可用内存 来判断目标min_score_adj
min_score_adj = OOM_SCORE_ADJ_MAX + 1; // 1000 + 1
for (i = 0; i < lowmem_targets_size; i++) {
minfree = lowmem_minfree[i];
if (other_free < minfree && other_file < minfree) {
min_score_adj = lowmem_adj[i];
break;
}
}
// 如果目标adj 等于1001 也就没有意义了 跳过这次kill
if (min_score_adj == OOM_SCORE_ADJ_MAX + 1) {
if (debug_process_killing) {
ALOGI("Ignore %s memory pressure event "
"(free memory=%ldkB, cache=%ldkB, limit=%ldkB)",
level_name[level], other_free * page_k, other_file * page_k,
(long)lowmem_minfree[lowmem_targets_size - 1] * page_k);
}
return;
}
goto do_kill;
}
我们找到min_score_adj之后就goto do_kill了,这里继续调用find_and_kill_process函数:
c++ 代码解读复制代码// 这里我们choose_heaviest_task 为true,但是如果我们的min_score_adj 小于200,设置choose_heaviest_task 为true
for (i = OOM_SCORE_ADJ_MAX; i >= min_score_adj; i--) {
struct proc *procp;
if (!choose_heaviest_task && i <= PERCEPTIBLE_APP_ADJ) {
/*
* If we have to choose a perceptible process, choose the heaviest one to
* hopefully minimize the number of victims.
*/
choose_heaviest_task = true;
}
// 如果是choose_heaviest_task 则寻找tasksize最大的那个,否则就找到第一个就可以,proc/1586/statm
while (true) {
procp = choose_heaviest_task ?
proc_get_heaviest(i) : proc_adj_lru(i);
if (!procp)
break;
killed_size = kill_one_process(procp, min_score_adj, kill_reason, kill_desc,
mi, wi, tm);
if (killed_size >= 0) {
if (!lmk_state_change_start) {
lmk_state_change_start = true;
stats_write_lmk_state_changed(
android::lmkd::stats::LMK_STATE_CHANGED__STATE__START);
}
break;
}
}
if (killed_size) {
break;
}
}
// 获取heaviest proc
static struct proc *proc_get_heaviest(int oomadj) {
struct adjslot_list *head = &procadjslot_list[ADJTOSLOT(oomadj)];
struct adjslot_list *curr = head->next;
struct proc *maxprocp = NULL;
int maxsize = 0;
while (curr != head) {
int pid = ((struct proc *)curr)->pid;
int tasksize = proc_get_size(pid);
if (tasksize <= 0) {
struct adjslot_list *next = curr->next;
pid_remove(pid);
curr = next;
} else {
if (tasksize > maxsize) {
maxsize = tasksize;
maxprocp = (struct proc *)curr;
}
curr = curr->next;
}
}
return maxprocp;
}
// 获取proc size
static int proc_get_size(int pid) {
char path[PATH_MAX];
char line[LINE_MAX];
int fd;
int rss = 0;
int total;
ssize_t ret;
/* gid containing AID_READPROC required */
snprintf(path, PATH_MAX, "/proc/%d/statm", pid);
fd = open(path, O_RDONLY | O_CLOEXEC);
if (fd == -1)
return -1;
ret = read_all(fd, line, sizeof(line) - 1);
if (ret < 0) {
close(fd);
return -1;
}
line[ret] = '\0';
sscanf(line, "%d %d ", &total, &rss);
close(fd);
return rss;
}
最后调用kill_one_process发送sigkill信号杀死进程,然后返回rss,作为size,这里放一张简单的图:
和AMS的交互
我们看下cmd
c++ 代码解读复制代码/*
* Supported LMKD commands
*/
enum lmk_cmd {
LMK_TARGET = 0, /* Associate minfree with oom_adj_score */
LMK_PROCPRIO, /* Register a process and set its oom_adj_score */
LMK_PROCREMOVE, /* Unregister a process */
LMK_PROCPURGE, /* Purge all registered processes */
LMK_GETKILLCNT, /* Get number of kills */
LMK_SUBSCRIBE, /* Subscribe for asynchronous events */
LMK_PROCKILL, /* Unsolicited msg to subscribed clients on proc kills */
LMK_UPDATE_PROPS, /* Reinit properties */
};
我们先看下第一个cmd LMK_TARGET,就是写入min free和oom_adj_score:
markdown 代码解读复制代码[sys.lmk.minfree_levels]: [18432:0,23040:100,27648:200,32256:250,55296:900,80640:950]
我们看下面这个时序图,开机过程中是在wm.displayReady函数最终调用到updateOomLevels,然后将minfree写入到lmkd的节点中的:
我们看下代码:
java 代码解读复制代码private final int[] mOomAdj = new int[] {
FOREGROUND_APP_ADJ, VISIBLE_APP_ADJ, PERCEPTIBLE_APP_ADJ,
PERCEPTIBLE_LOW_APP_ADJ, CACHED_APP_MIN_ADJ, CACHED_APP_LMK_FIRST_ADJ
}; // 0,100,200,250,900,950
// These are the low-end OOM level limits. This is appropriate for an
// HVGA or smaller phone with less than 512MB. Values are in KB.
private final int[] mOomMinFreeLow = new int[] {
12288, 18432, 24576,
36864, 43008, 49152
};
// These are the high-end OOM level limits. This is appropriate for a
// 1280x800 or larger screen with around 1GB RAM. Values are in KB.
private final int[] mOomMinFreeHigh = new int[] {
73728, 92160, 110592,
129024, 147456, 184320
};
// 这里scale = 1.0
for (int i = 0; i < mOomAdj.length; i++) {
int low = mOomMinFreeLow[i];
int high = mOomMinFreeHigh[i];
if (is64bit) {
// Increase the high min-free levels for cached processes for 64-bit
if (i == 4) high = (high * 3) / 2;
else if (i == 5) high = (high * 7) / 4;
}
mOomMinFree[i] = (int)(low + ((high - low) * scale));
}
if (write) {
ByteBuffer buf = ByteBuffer.allocate(4 * (2 * mOomAdj.length + 1));
buf.putInt(LMK_TARGET);
for (int i = 0; i < mOomAdj.length; i++) {
buf.putInt((mOomMinFree[i] * 1024)/PAGE_SIZE); // 相当于除以4
buf.putInt(mOomAdj[i]);
}
writeLmkd(buf, null);
SystemProperties.set("sys.sysctl.extra_free_kbytes", Integer.toString(reserve));
mOomLevelsSet = true;
}
LMK_PROCPRIO是更新oom或者进程创建的时候设置OOM用的,这个调用的地方比较多就不一一列举,调用入口updateOomAdjLocked:
java 代码解读复制代码
public static void setOomAdj(int pid, int uid, int amt) {
// This indicates that the process is not started yet and so no need to proceed further.
if (pid <= 0) {
return;
}
if (amt == UNKNOWN_ADJ)
return;
long start = SystemClock.elapsedRealtime();
ByteBuffer buf = ByteBuffer.allocate(4 * 4);
buf.putInt(LMK_PROCPRIO);
buf.putInt(pid);
buf.putInt(uid);
buf.putInt(amt);
writeLmkd(buf, null);
long now = SystemClock.elapsedRealtime();
if ((now-start) > 250) {
Slog.w("ActivityManager", "SLOW OOM ADJ: " + (now-start) + "ms for pid " + pid
+ " = " + amt);
}
}
LMK_GETKILLCNT命令用来获取一个adj 区间内发生的kill次数,比如(0,900)之间:
c++ 代码解读复制代码private boolean reportLmkKillAtOrBelow(PrintWriter pw, int oom_adj) {
Integer cnt = ProcessList.getLmkdKillCount(0, oom_adj);
if (cnt != null) {
pw.println(" kills at or below oom_adj " + oom_adj + ": " + cnt);
return true;
}
return false;
}
我们也可以通过dumpsys命令来查看:
java 代码解读复制代码ACTIVITY MANAGER LMK KILLS (dumpsys activity lmk)
Total number of kills: 0
kills at or below oom_adj 999: 0
kills at or below oom_adj 900: 0
kills at or below oom_adj 800: 0
kills at or below oom_adj 700: 0
kills at or below oom_adj 600: 0
kills at or below oom_adj 500: 0
kills at or below oom_adj 400: 0
kills at or below oom_adj 300: 0
kills at or below oom_adj 250: 0
kills at or below oom_adj 200: 0
kills at or below oom_adj 100: 0
kills at or below oom_adj 0: 0
LMK_PROCREMOVE 是应用死亡之后, AMS 向LMKD报告,移除相关的数据结构
java 代码解读复制代码final void handleAppDiedLocked(ProcessRecord app,
boolean restarting, boolean allowRestart) {
int pid = app.pid;
boolean kept = cleanUpApplicationRecordLocked(app, restarting, allowRestart, -1,
false /*replacingPid*/);
if (!kept && !restarting) {
removeLruProcessLocked(app);
if (pid > 0) {
ProcessList.remove(pid); //这里向lmkd发送proc移除命令
}
}
//省略一万行代码
}
总结
- LMKD 初始化时,读取系统的配置属性决定自己的参数信息,然后注册两个handler用于处理内核压力上报处理和AMS cmd处理
- LMKD接受内核内存压力上报,然后结合当前内存的状态,根据minfree配置参数去杀进程
- AMS 负责Android侧的进程管理,调整adj的时候通过cmd发送给LMKD,维护在LMKD的链表中
评论记录:
回复评论: