# This file is part of Cockpit. # # Copyright (C) 2022 Red Hat, Inc. # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . import errno import logging import os import re from typing import Any, DefaultDict, Iterable, List, NamedTuple, Optional, Tuple from cockpit._vendor.systemd_ctypes import Handle USER_HZ = os.sysconf(os.sysconf_names['SC_CLK_TCK']) MS_PER_JIFFY = 1000 / (USER_HZ if (USER_HZ > 0) else 100) HWMON_PATH = '/sys/class/hwmon' # we would like to do this, but mypy complains; https://github.com/python/mypy/issues/2900 # Samples = collections.defaultdict[str, Union[float, Dict[str, Union[float, None]]]] Samples = DefaultDict[str, Any] logger = logging.getLogger(__name__) def read_int_file(rootfd: int, statfile: str, default: Optional[int] = None, key: bytes = b'') -> Optional[int]: # Not every stat is available, such as cpu.weight try: fd = os.open(statfile, os.O_RDONLY, dir_fd=rootfd) except FileNotFoundError: return None try: data = os.read(fd, 1024) except OSError as e: # cgroups can disappear between the open and read if e.errno != errno.ENODEV: logger.warning('Failed to read %s: %s', statfile, e) return None finally: os.close(fd) if key: start = data.index(key) + len(key) end = data.index(b'\n', start) data = data[start:end] try: # 0 often means "none", so replace it with default value return int(data) or default except ValueError: # Some samples such as "memory.max" contains "max" when there is a no limit return None class SampleDescription(NamedTuple): name: str units: str semantics: str instanced: bool class Sampler: descriptions: List[SampleDescription] def sample(self, samples: Samples) -> None: raise NotImplementedError class CPUSampler(Sampler): descriptions = [ SampleDescription('cpu.basic.nice', 'millisec', 'counter', instanced=False), SampleDescription('cpu.basic.user', 'millisec', 'counter', instanced=False), SampleDescription('cpu.basic.system', 'millisec', 'counter', instanced=False), SampleDescription('cpu.basic.iowait', 'millisec', 'counter', instanced=False), SampleDescription('cpu.core.nice', 'millisec', 'counter', instanced=True), SampleDescription('cpu.core.user', 'millisec', 'counter', instanced=True), SampleDescription('cpu.core.system', 'millisec', 'counter', instanced=True), SampleDescription('cpu.core.iowait', 'millisec', 'counter', instanced=True), ] def sample(self, samples: Samples) -> None: with open('/proc/stat') as stat: for line in stat: if not line.startswith('cpu'): continue cpu, user, nice, system, _idle, iowait = line.split()[:6] core = cpu[3:] or None if core: prefix = 'cpu.core' samples[f'{prefix}.nice'][core] = int(nice) * MS_PER_JIFFY samples[f'{prefix}.user'][core] = int(user) * MS_PER_JIFFY samples[f'{prefix}.system'][core] = int(system) * MS_PER_JIFFY samples[f'{prefix}.iowait'][core] = int(iowait) * MS_PER_JIFFY else: prefix = 'cpu.basic' samples[f'{prefix}.nice'] = int(nice) * MS_PER_JIFFY samples[f'{prefix}.user'] = int(user) * MS_PER_JIFFY samples[f'{prefix}.system'] = int(system) * MS_PER_JIFFY samples[f'{prefix}.iowait'] = int(iowait) * MS_PER_JIFFY class MemorySampler(Sampler): descriptions = [ SampleDescription('memory.free', 'bytes', 'instant', instanced=False), SampleDescription('memory.used', 'bytes', 'instant', instanced=False), SampleDescription('memory.cached', 'bytes', 'instant', instanced=False), SampleDescription('memory.swap-used', 'bytes', 'instant', instanced=False), ] def sample(self, samples: Samples) -> None: with open('/proc/meminfo') as meminfo: items = {k: int(v.strip(' kB\n')) for line in meminfo for k, v in [line.split(':', 1)]} samples['memory.free'] = 1024 * items['MemFree'] samples['memory.used'] = 1024 * (items['MemTotal'] - items['MemAvailable']) samples['memory.cached'] = 1024 * (items['Buffers'] + items['Cached']) samples['memory.swap-used'] = 1024 * (items['SwapTotal'] - items['SwapFree']) class CPUTemperatureSampler(Sampler): # Cache found sensors, as they can't be hotplugged. sensors: Optional[List[str]] = None descriptions = [ SampleDescription('cpu.temperature', 'celsius', 'instant', instanced=True), ] @staticmethod def detect_cpu_sensors(dir_fd: int) -> Iterable[str]: # Read the name file to decide what to do with this directory try: with Handle.open('name', os.O_RDONLY, dir_fd=dir_fd) as fd: name = os.read(fd, 1024).decode().strip() except FileNotFoundError: return if name == 'atk0110': # only sample 'CPU Temperature' in atk0110 predicate = (lambda label: label == 'CPU Temperature') elif name == 'cpu_thermal': # labels are not used on ARM predicate = None elif name == 'coretemp': # accept all labels on Intel predicate = None elif name in ['k8temp', 'k10temp']: predicate = None else: # Not a CPU sensor return # Now scan the directory for inputs for input_filename in os.listdir(dir_fd): if not input_filename.endswith('_input'): continue if predicate: # We need to check the label try: label_filename = input_filename.replace('_input', '_label') with Handle.open(label_filename, os.O_RDONLY, dir_fd=dir_fd) as fd: label = os.read(fd, 1024).decode().strip() except FileNotFoundError: continue if not predicate(label): continue yield input_filename @staticmethod def scan_sensors() -> Iterable[str]: try: top_fd = Handle.open(HWMON_PATH, os.O_RDONLY | os.O_DIRECTORY) except FileNotFoundError: return with top_fd: for hwmon_name in os.listdir(top_fd): with Handle.open(hwmon_name, os.O_RDONLY | os.O_DIRECTORY, dir_fd=top_fd) as subdir_fd: for sensor in CPUTemperatureSampler.detect_cpu_sensors(subdir_fd): yield f'{HWMON_PATH}/{hwmon_name}/{sensor}' def sample(self, samples: Samples) -> None: if self.sensors is None: self.sensors = list(CPUTemperatureSampler.scan_sensors()) for sensor_path in self.sensors: with open(sensor_path) as sensor: temperature = int(sensor.read().strip()) if temperature == 0: return samples['cpu.temperature'][sensor_path] = temperature / 1000 class DiskSampler(Sampler): descriptions = [ SampleDescription('disk.all.read', 'bytes', 'counter', instanced=False), SampleDescription('disk.all.written', 'bytes', 'counter', instanced=False), SampleDescription('disk.dev.read', 'bytes', 'counter', instanced=True), SampleDescription('disk.dev.written', 'bytes', 'counter', instanced=True), ] def sample(self, samples: Samples) -> None: with open('/proc/diskstats') as diskstats: all_read_bytes = 0 all_written_bytes = 0 for line in diskstats: # https://www.kernel.org/doc/Documentation/ABI/testing/procfs-diskstats fields = line.strip().split() dev_major = fields[0] dev_name = fields[2] num_sectors_read = fields[5] num_sectors_written = fields[9] # ignore mdraid if dev_major == '9': continue # ignore device-mapper if dev_name.startswith('dm-'): continue # Skip partitions if dev_name[:2] in ['sd', 'hd', 'vd'] and dev_name[-1].isdigit(): continue # Ignore nvme partitions if dev_name.startswith('nvme') and 'p' in dev_name: continue read_bytes = int(num_sectors_read) * 512 written_bytes = int(num_sectors_written) * 512 all_read_bytes += read_bytes all_written_bytes += written_bytes samples['disk.dev.read'][dev_name] = read_bytes samples['disk.dev.written'][dev_name] = written_bytes samples['disk.all.read'] = all_read_bytes samples['disk.all.written'] = all_written_bytes class CGroupSampler(Sampler): descriptions = [ SampleDescription('cgroup.memory.usage', 'bytes', 'instant', instanced=True), SampleDescription('cgroup.memory.limit', 'bytes', 'instant', instanced=True), SampleDescription('cgroup.memory.sw-usage', 'bytes', 'instant', instanced=True), SampleDescription('cgroup.memory.sw-limit', 'bytes', 'instant', instanced=True), SampleDescription('cgroup.cpu.usage', 'millisec', 'counter', instanced=True), SampleDescription('cgroup.cpu.shares', 'count', 'instant', instanced=True), ] cgroups_v2: Optional[bool] = None def sample(self, samples: Samples) -> None: if self.cgroups_v2 is None: self.cgroups_v2 = os.path.exists('/sys/fs/cgroup/cgroup.controllers') if self.cgroups_v2: cgroups_v2_path = '/sys/fs/cgroup/' for path, _, _, rootfd in os.fwalk(cgroups_v2_path): cgroup = path.replace(cgroups_v2_path, '') if not cgroup: continue samples['cgroup.memory.usage'][cgroup] = read_int_file(rootfd, 'memory.current', 0) samples['cgroup.memory.limit'][cgroup] = read_int_file(rootfd, 'memory.max') samples['cgroup.memory.sw-usage'][cgroup] = read_int_file(rootfd, 'memory.swap.current', 0) samples['cgroup.memory.sw-limit'][cgroup] = read_int_file(rootfd, 'memory.swap.max') samples['cgroup.cpu.shares'][cgroup] = read_int_file(rootfd, 'cpu.weight') usage_usec = read_int_file(rootfd, 'cpu.stat', 0, key=b'usage_usec') if usage_usec: samples['cgroup.cpu.usage'][cgroup] = usage_usec / 1000 else: memory_path = '/sys/fs/cgroup/memory/' for path, _, _, rootfd in os.fwalk(memory_path): cgroup = path.replace(memory_path, '') if not cgroup: continue samples['cgroup.memory.usage'][cgroup] = read_int_file(rootfd, 'memory.usage_in_bytes', 0) samples['cgroup.memory.limit'][cgroup] = read_int_file(rootfd, 'memory.limit_in_bytes') samples['cgroup.memory.sw-usage'][cgroup] = read_int_file(rootfd, 'memory.memsw.usage_in_bytes', 0) samples['cgroup.memory.sw-limit'][cgroup] = read_int_file(rootfd, 'memory.memsw.limit_in_bytes') cpu_path = '/sys/fs/cgroup/cpu/' for path, _, _, rootfd in os.fwalk(cpu_path): cgroup = path.replace(cpu_path, '') if not cgroup: continue samples['cgroup.cpu.shares'][cgroup] = read_int_file(rootfd, 'cpu.shares') usage_nsec = read_int_file(rootfd, 'cpuacct.usage') if usage_nsec: samples['cgroup.cpu.usage'][cgroup] = usage_nsec / 1000000 class CGroupDiskIO(Sampler): IO_RE = re.compile(rb'\bread_bytes: (?P\d+).*\nwrite_bytes: (?P\d+)', flags=re.S) descriptions = [ SampleDescription('disk.cgroup.read', 'bytes', 'counter', instanced=True), SampleDescription('disk.cgroup.written', 'bytes', 'counter', instanced=True), ] @staticmethod def get_cgroup_name(fd: int) -> str: with Handle.open('cgroup', os.O_RDONLY, dir_fd=fd) as cgroup_fd: cgroup_name = os.read(cgroup_fd, 2048).decode().strip() # Skip leading ::0/ return cgroup_name[4:] @staticmethod def get_proc_io(fd: int) -> Tuple[int, int]: with Handle.open('io', os.O_RDONLY, dir_fd=fd) as io_fd: data = os.read(io_fd, 4096) match = re.search(CGroupDiskIO.IO_RE, data) if match: proc_read = int(match.group('read')) proc_write = int(match.group('write')) return proc_read, proc_write return 0, 0 def sample(self, samples: Samples): with Handle.open('/proc', os.O_RDONLY | os.O_DIRECTORY) as proc_fd: reads = samples['disk.cgroup.read'] writes = samples['disk.cgroup.written'] for path in os.listdir(proc_fd): # non-pid entries in proc are guaranteed to start with a character a-z if path[0] < '0' or path[0] > '9': continue try: with Handle.open(path, os.O_PATH, dir_fd=proc_fd) as pid_fd: cgroup_name = self.get_cgroup_name(pid_fd) proc_read, proc_write = self.get_proc_io(pid_fd) except (FileNotFoundError, PermissionError, ProcessLookupError): continue reads[cgroup_name] = reads.get(cgroup_name, 0) + proc_read writes[cgroup_name] = writes.get(cgroup_name, 0) + proc_write class NetworkSampler(Sampler): descriptions = [ SampleDescription('network.interface.tx', 'bytes', 'counter', instanced=True), SampleDescription('network.interface.rx', 'bytes', 'counter', instanced=True), ] def sample(self, samples: Samples) -> None: with open("/proc/net/dev") as network_samples: for line in network_samples: fields = line.split() # Skip header line if fields[0][-1] != ':': continue iface = fields[0][:-1] samples['network.interface.rx'][iface] = int(fields[1]) samples['network.interface.tx'][iface] = int(fields[9]) class MountSampler(Sampler): descriptions = [ SampleDescription('mount.total', 'bytes', 'instant', instanced=True), SampleDescription('mount.used', 'bytes', 'instant', instanced=True), ] def sample(self, samples: Samples) -> None: with open('/proc/mounts') as mounts: for line in mounts: # Only look at real devices if line[0] != '/': continue path = line.split()[1] try: res = os.statvfs(path) except OSError: continue frsize = res.f_frsize total = frsize * res.f_blocks samples['mount.total'][path] = total samples['mount.used'][path] = total - frsize * res.f_bfree class BlockSampler(Sampler): descriptions = [ SampleDescription('block.device.read', 'bytes', 'counter', instanced=True), SampleDescription('block.device.written', 'bytes', 'counter', instanced=True), ] def sample(self, samples: Samples) -> None: with open('/proc/diskstats') as diskstats: for line in diskstats: # https://www.kernel.org/doc/Documentation/ABI/testing/procfs-diskstats [_, _, dev_name, _, _, sectors_read, _, _, _, sectors_written, *_] = line.strip().split() samples['block.device.read'][dev_name] = int(sectors_read) * 512 samples['block.device.written'][dev_name] = int(sectors_written) * 512 SAMPLERS = [ BlockSampler, CGroupSampler, CGroupDiskIO, CPUSampler, CPUTemperatureSampler, DiskSampler, MemorySampler, MountSampler, NetworkSampler, ]