Path: blob/master/src/packages/frontend/compute/health-check.tsx
1503 views
import { Button, Input, InputNumber, Radio, Space } from "antd";1import { useState } from "react";2import ShowError from "@cocalc/frontend/components/error";3import { useServer } from "./compute-server";4import { webapp_client } from "@cocalc/frontend/webapp-client";5import { capitalize, plural } from "@cocalc/util/misc";6import {7type HealthCheck as IHealthCheck,8HEALTH_CHECK_DEFAULTS,9HEALTH_CHECK_ACTIONS,10validatedHealthCheck,11ACTION_INFO,12} from "@cocalc/util/db-schema/compute-servers";13import { AutomaticShutdownCard } from "./automatic-shutdown";14import { setServerConfiguration } from "./api";15import { isEqual } from "lodash";16import { Icon } from "@cocalc/frontend/components";1718export function HealthCheck({ id, project_id, help }) {19const server = useServer({ id, project_id });20const [error, setError] = useState<string>("");21const [test, setTest] = useState<string>("");22const [saving, setSaving] = useState<boolean>(false);23const [healthCheck, setHealthCheck] = useState<Partial<IHealthCheck>>(24validatedHealthCheck(25server?.configuration?.healthCheck ?? HEALTH_CHECK_DEFAULTS,26)!,27);2829const doTest = async () => {30try {31setSaving(true);32setTest("");33const resp = await webapp_client.exec({34filesystem: false,35compute_server_id: id,36project_id,37command: healthCheck.command,38bash: true,39err_on_exit: false,40});41delete resp.type;42setTest(JSON.stringify(resp, undefined, 2));43} catch (err) {44setError(`${err}`);45} finally {46setSaving(false);47}48};49const periodSeconds =50healthCheck.periodSeconds ?? HEALTH_CHECK_DEFAULTS.periodSeconds;51const failureThreshold =52healthCheck.failureThreshold ?? HEALTH_CHECK_DEFAULTS.failureThreshold;53const initialDelaySeconds =54healthCheck.initialDelaySeconds ??55HEALTH_CHECK_DEFAULTS.initialDelaySeconds;5657return (58<AutomaticShutdownCard59title="Health Check"60icon="medkit"61setEnabled={(enabled) => setHealthCheck({ ...healthCheck, enabled })}62save={async () => {63await setServerConfiguration({64id,65configuration: {66healthCheck: { ...HEALTH_CHECK_DEFAULTS, ...healthCheck },67},68});69}}70hasUnsavedChanges={71!isEqual(72validatedHealthCheck(73server.configuration?.healthCheck ?? HEALTH_CHECK_DEFAULTS,74),75validatedHealthCheck(healthCheck),76) &&77healthCheck.failureThreshold != null &&78healthCheck.periodSeconds != null &&79healthCheck.timeoutSeconds != null &&80healthCheck.initialDelaySeconds != null81}82savedEnabled={!!server.configuration?.healthCheck?.enabled}83enabled={healthCheck.enabled}84saving={saving}85setSaving={setSaving}86error={error}87setError={setError}88confirmSave={89healthCheck.action == "deprovision"90? "Are you sure? This could automatically delete data."91: undefined92}93>94{help && (95<div>96<p style={{ marginBottom: "15px" }}>97Run this bash command on your compute server (from the HOME98directory) every {periodSeconds} {plural(periodSeconds, "second")}.99If the command fails{" "}100{`${failureThreshold} ${plural(failureThreshold, "time")} in a row`}101, then {healthCheck.action} the compute server.102</p>103<ul>104<li>105NOTE: If you set the action to "Stop" instead of "Reboot" you can106use this as an arbitrarily sophisticated way of automatically107stopping your compute server. E.g., you can make a script that108monitors GPU usage, then stop the compute server.109</li>110</ul>111</div>112)}113<Space direction="vertical" size="large">114<Space style={{ width: "100%" }} wrap>115<Input116style={{ width: "508px" }}117allowClear118disabled={saving}119value={healthCheck.command}120onChange={(e) =>121setHealthCheck({ ...healthCheck, command: e.target.value })122}123placeholder={`Shell Command (bash) -- ${healthCheck.action} when this fails ${failureThreshold} times...`}124/>125<InputNumber126style={{ width: "250px" }}127disabled={saving}128min={1}129step={1}130value={healthCheck.timeoutSeconds}131onChange={(timeoutSeconds) =>132setHealthCheck({133...healthCheck,134timeoutSeconds: timeoutSeconds ?? undefined,135})136}137addonAfter="seconds timeout"138placeholder="Command timeout..."139/>140</Space>141<Space style={{ width: "100%" }} wrap>142<InputNumber143style={{ width: "250px" }}144disabled={saving}145min={1}146step={1}147value={failureThreshold}148onChange={(failureThreshold) =>149setHealthCheck({150...healthCheck,151failureThreshold: failureThreshold ?? undefined,152})153}154addonAfter={`attempts before ${healthCheck.action}`}155placeholder="Failure threshold..."156/>157<InputNumber158style={{ width: "250px" }}159disabled={saving}160min={60}161step={30}162value={periodSeconds}163onChange={(periodSeconds) =>164setHealthCheck({165...healthCheck,166periodSeconds: periodSeconds ?? undefined,167})168}169addonAfter="seconds between checks"170placeholder="Interval..."171/>172<InputNumber173style={{ width: "250px" }}174disabled={saving}175min={60}176step={30}177value={initialDelaySeconds}178onChange={(initialDelaySeconds) =>179setHealthCheck({180...healthCheck,181initialDelaySeconds: initialDelaySeconds ?? undefined,182})183}184addonAfter="seconds initial delay"185placeholder="Initial delay..."186/>187</Space>188<Space style={{ width: "100%" }}>189<div style={{ marginRight: "15px" }}>190Action when health check fails:191</div>192<Radio.Group193style={{ flex: 1 }}194disabled={saving}195options={HEALTH_CHECK_ACTIONS.filter(196(action) =>197ACTION_INFO[action].isSupported?.(server.configuration) ?? true,198).map((action) => {199return {200label: (201<>202<Icon name={ACTION_INFO[action].icon as any} />{" "}203{capitalize(action)}204</>205),206value: action,207};208})}209optionType="button"210buttonStyle="solid"211value={healthCheck.action ?? HEALTH_CHECK_DEFAULTS.action}212onChange={(e) => {213setHealthCheck({ ...healthCheck, action: e.target.value });214}}215/>216</Space>217<div style={{ textAlign: "center" }}>218<Button219disabled={220saving ||221!healthCheck.command?.trim() ||222server.state != "running"223}224onClick={doTest}225>226Test227{server.state != "running" ? " (start server to test command)" : ""}228</Button>229</div>230{test && (231<pre232style={{233width: "550px",234overflow: "auto",235background: "#e8e8e8",236padding: "15px",237borderRadius: "15px",238}}239>240{test}241</pre>242)}243<ShowError244error={error}245setError={setError}246style={{ width: "100%" }}247/>248</Space>249</AutomaticShutdownCard>250);251}252253254