Path: blob/master/src/packages/project/project-info/server.ts
1447 views
/*1* This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.2* License: MS-RSL – see LICENSE.md for details3*/45/*6Project information server, doing the heavy lifting of telling the client7about what's going on in a project.89This is an event emitter that emits a ProjectInfo object periodically when running.10*/1112import { delay } from "awaiting";13import type { DiskUsage as DF_DiskUsage } from "diskusage";14import { check as df } from "diskusage";15import { EventEmitter } from "node:events";16import { readFile } from "node:fs/promises";17import { ProcessStats } from "@cocalc/backend/process-stats";18import { pidToPath as terminalPidToPath } from "@cocalc/project/conat/terminal/manager";19import type {20CGroup,21CoCalcInfo,22DiskUsage,23Process,24Processes,25ProjectInfo,26} from "@cocalc/util/types/project-info/types";27import { get_path_for_pid as x11_pid2path } from "../x11/server";28import { getLogger } from "../logger";2930const L = getLogger("project-info:server").debug;3132const bytes2MiB = (bytes) => bytes / (1024 * 1024);3334export class ProjectInfoServer extends EventEmitter {35private last?: ProjectInfo = undefined;36private readonly dbg: Function;37private running = false;38private readonly testing: boolean;39private delay_s: number;40private cgroupFilesAreMissing: boolean = false;41private processStats: ProcessStats;4243constructor(testing = false) {44super();45this.delay_s = 2;46this.testing = testing;47this.dbg = L;48}4950private async processes(timestamp: number) {51return await this.processStats.processes(timestamp);52}5354// delta-time for this and the previous process information55private dt(timestamp) {56return (timestamp - (this.last?.timestamp ?? 0)) / 1000;57}5859public latest(): ProjectInfo | undefined {60return this.last;61}6263// for a process we know (pid, etc.) we try to map to cocalc specific information64private async cocalc({65pid,66cmdline,67}: Pick<Process, "pid" | "cmdline">): Promise<CoCalcInfo | undefined> {68//this.dbg("classify", { pid, exe, cmdline });69if (pid === process.pid) {70return { type: "project" };71}72// SPEED: importing @cocalc/jupyter/kernel is slow, so it MUST NOT BE DONE73// on the top level, especially not in any code that is loaded during74// project startup75const { get_kernel_by_pid } = await import("@cocalc/jupyter/kernel");76const jupyter_kernel = get_kernel_by_pid(pid);77if (jupyter_kernel != null) {78return { type: "jupyter", path: jupyter_kernel.get_path() };79}80const termpath = terminalPidToPath(pid);81if (termpath != null) {82return { type: "terminal", path: termpath };83}84const x11_path = x11_pid2path(pid);85if (x11_path != null) {86return { type: "x11", path: x11_path };87}88// SSHD: strangely, just one long string in cmdline[0]89if (90cmdline.length === 1 &&91cmdline[0].startsWith("sshd:") &&92cmdline[0].indexOf("-p 2222") != -193) {94return { type: "sshd" };95}96}9798private async lookupCoCalcInfo(processes: Processes) {99// iterate over all processes keys (pid) and call this.cocalc({pid, cmdline})100// to update the processes coclc field101for (const pid in processes) {102processes[pid].cocalc = await this.cocalc({103pid: parseInt(pid),104cmdline: processes[pid].cmdline,105});106}107}108109// this is specific to running a project in a CGroup container110// Harald: however, even without a container this shouldn't fail … just tells111// you what the whole system is doing, all your processes.112// William: it's constantly failing in cocalc-docker every second, so to avoid113// clogging logs and wasting CPU, if the files are missing once, it stops updating.114private async cgroup({ timestamp }): Promise<CGroup | undefined> {115if (this.cgroupFilesAreMissing) {116return;117}118try {119const [mem_stat_raw, cpu_raw, oom_raw, cfs_quota_raw, cfs_period_raw] =120await Promise.all([121readFile("/sys/fs/cgroup/memory/memory.stat", "utf8"),122readFile("/sys/fs/cgroup/cpu,cpuacct/cpuacct.usage", "utf8"),123readFile("/sys/fs/cgroup/memory/memory.oom_control", "utf8"),124readFile("/sys/fs/cgroup/cpu,cpuacct/cpu.cfs_quota_us", "utf8"),125readFile("/sys/fs/cgroup/cpu,cpuacct/cpu.cfs_period_us", "utf8"),126]);127const mem_stat_keys = [128"total_rss",129"total_cache",130"hierarchical_memory_limit",131];132const cpu_usage = parseFloat(cpu_raw) / Math.pow(10, 9);133const dt = this.dt(timestamp);134const cpu_usage_rate =135this.last?.cgroup != null136? (cpu_usage - this.last.cgroup.cpu_usage) / dt137: 0;138const [cfs_quota, cfs_period] = [139parseInt(cfs_quota_raw),140parseInt(cfs_period_raw),141];142const mem_stat = mem_stat_raw143.split("\n")144.map((line) => line.split(" "))145.filter(([k, _]) => mem_stat_keys.includes(k))146.reduce((stat, [key, val]) => {147stat[key] = bytes2MiB(parseInt(val));148return stat;149}, {});150const oom_kills = oom_raw151.split("\n")152.filter((val) => val.startsWith("oom_kill "))153.map((val) => parseInt(val.slice("oom_kill ".length)))[0];154return {155mem_stat,156cpu_usage,157cpu_usage_rate,158cpu_cores_limit: cfs_quota / cfs_period,159oom_kills,160};161} catch (err) {162this.dbg("cgroup: error", err);163if (err.code == "ENOENT") {164// TODO: instead of shutting this down, we could maybe do a better job165// figuring out what the correct cgroups files are on a given system.166// E.g., in my cocalc-docker, I do NOT have /sys/fs/cgroup/memory/memory.stat167// but I do have /sys/fs/cgroup/memory.stat168this.cgroupFilesAreMissing = true;169this.dbg(170"cgroup: files are missing so cgroups info will no longer be updated",171);172}173return undefined;174}175}176177// for cocalc/kucalc we want to know the disk usage + limits of the178// users home dir and /tmp. /tmp is a ram disk, which will count against179// the overall memory limit!180private async disk_usage(): Promise<DiskUsage> {181const convert = function (val: DF_DiskUsage) {182return {183total: bytes2MiB(val.total),184free: bytes2MiB(val.free),185available: bytes2MiB(val.available),186usage: bytes2MiB(val.total - val.free),187};188};189const [tmp, project] = await Promise.all([190df("/tmp"),191df(process.env.HOME ?? "/home/user"),192]);193return { tmp: convert(tmp), project: convert(project) };194}195196// orchestrating where all the information is bundled up for an update197private async get_info(): Promise<ProjectInfo | undefined> {198try {199const timestamp = Date.now();200const [processes, cgroup, disk_usage] = await Promise.all([201this.processes(timestamp),202this.cgroup({ timestamp }),203this.disk_usage(),204]);205const { procs, boottime, uptime } = processes;206await this.lookupCoCalcInfo(procs);207const info: ProjectInfo = {208timestamp,209processes: procs,210uptime,211boottime,212cgroup,213disk_usage,214};215return info;216} catch (err) {217this.dbg("get_info: error", err);218}219}220221public stop() {222this.running = false;223}224225close = () => {226this.stop();227};228229public async start(): Promise<void> {230if (this.running) {231this.dbg("project-info/server: already running, cannot be started twice");232} else {233await this._start();234}235}236237private async _start(): Promise<void> {238this.dbg("start");239if (this.running) {240throw Error("Cannot start ProjectInfoServer twice");241}242this.running = true;243this.processStats = new ProcessStats({244testing: this.testing,245dbg: this.dbg,246});247await this.processStats.init();248while (true) {249//this.dbg(`listeners on 'info': ${this.listenerCount("info")}`);250const info = await this.get_info();251if (info != null) this.last = info;252this.emit("info", info ?? this.last);253if (this.running) {254await delay(1000 * this.delay_s);255} else {256this.dbg("start: no longer running → stopping loop");257this.last = undefined;258return;259}260// in test mode just one more, that's enough261if (this.last != null && this.testing) {262const info = await this.get_info();263this.dbg(JSON.stringify(info, null, 2));264return;265}266}267}268}269270// testing: $ ts-node server.ts271if (require.main === module) {272const pis = new ProjectInfoServer(true);273pis.start().then(() => process.exit());274}275276277