# 处理节点资源告警之磁盘余量不足:
# 步骤1 查看根目录存储
root@test-node-003:~# df -h /
Filesystem Size Used Avail Use% Mounted on
/dev/vda2 197G 149G 40G 80% /
# 步骤2 清理未使用容器镜像
root@test-node-003:~# crictl --timeout 100s rmi --prune
# 步骤4
# 步骤4.A 查看日志占用
root@test-node-002:~# du -sh /var/log/* | grep G
4.0G /var/log/journal
2.3G /var/log/pods
46G /var/log/syslog
60G /var/log/syslog.1
root@test-node-002:~# truncate -s 0 /var/log/syslog.1
root@test-node-002:~# du -sh /var/log/* | grep G
4.0G /var/log/journal
2.3G /var/log/pods
46G /var/log/syslog
# 步骤4.B 查看容器占用
root@test-node-001:~# du -sh /var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/* | grep G
2.2G /var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/1506
1.2G /var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/152160
du: cannot access '/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/216895': No such file or directory
du: cannot access '/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/216896': No such file or directory
1.9G /var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/220397
root@test-node-001:~# ls /var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/152160
fs work
- 执行
df -h /
查看挂载/使用情况,如上use% 显示已使用80%,需要清理根目录使用 - 执行
crictl --timeout 100s rmi --prune
清理未使用的docker image(曾经使用,但现在未使用) - 执行后查看df -h / 是否减少至足够小,如果减少至60%以下即可完成清理,如果只减少不到10% 说明很快仍然会拉满,所以需要继续排查后续清理。
- 继续排查清理:
以如下内容为例查询对应的容器, 查询这个占用118M的upperdir对应占用的容器:
#!/bin/bash
# Get all overlayfs mounts
overlay_mounts=$(mount | grep overlay)
# Parse and print upperdir information
echo "$overlay_mounts" | while read -r line; do
upperdir=$(echo "$line" | grep -oP 'upperdir=\K[^,]+')
if [ -n "$upperdir" ]; then
size=$(du -sh "$upperdir" 2>/dev/null | awk '{print $1}')
echo "Upperdir: $upperdir , Size: $size"
fi
done
执行后:
Upperdir: /var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/70491/fs , Size: 118M
root@test-node-004:~# mount | grep /var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/70491/fs
overlay on /run/containerd/io.containerd.runtime.v2.task/k8s.io/6a163a87e0b8f4c7415a487051cd7bdcbdb7f2038b2a614f6ca134f03de0ad3e/rootfs type overlay (rw,relatime,lowerdir=/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/70490/fs:/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/70489/fs:/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/70488/fs:/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/70487/fs:/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/70486/fs:/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/70485/fs:/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/70484/fs:/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/41205/fs:/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/41204/fs:/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/41203/fs:/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/41202/fs:/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/31318/fs:/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/31317/fs:/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/31314/fs:/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/72/fs,upperdir=/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/70491/fs,workdir=/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/70491/work)
root@test-node-004:~#
root@test-node-004:~#
root@test-node-004:~#
root@test-node-004:~# crictl ps --no-trunc | grep 6a163
6a163a87e0b8f4c7415a487051cd7bdcbdb7f2038b2a614f6ca134f03de0ad3e sha256:fc7144f7e74403c60fe428b8236640e37b1e7a31b9969492c88332104aeadb31 2 weeks ago Running dbprovider-frontend 0 e705e7d956e66c7c13ae76058caaa53ef3168384dead52e10ed9124018ea8e03 dbprovider-frontend-646ddb74dc-slvjz
通过mount | grep $upper_dir
层 我们可以看到挂载目录为:/run/containerd/io.containerd.runtime.v2.task/k8s.io/6a163a87e0b8f4c7415a487051cd7bdcbdb7f2038b2a614f6ca134f03de0ad3e/rootfs
这里/run/containerd/io.containerd.runtime.v2.task/k8s.io/xxx
这一串数字为容器id,这里我们可以在该节点上通过crictl ps | grep 容器id前几位来删选出来具体容器名称:`crictl ps | grep 6a163` 这里我们可以看到展示了容器名称dbprovider-frontend 以及pod名称 dbprovider-frontend-646ddb74dc-slvjz
crictl inspect $container-id | jq -r {.info.pid} 可以查看容器对应主机的pid
- 还是清理不出来的话 就扩容系统盘吧。