Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
sagemathinc
GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/packages/frontend/compute/health-check.tsx
1503 views
1
import { Button, Input, InputNumber, Radio, Space } from "antd";
2
import { useState } from "react";
3
import ShowError from "@cocalc/frontend/components/error";
4
import { useServer } from "./compute-server";
5
import { webapp_client } from "@cocalc/frontend/webapp-client";
6
import { capitalize, plural } from "@cocalc/util/misc";
7
import {
8
type HealthCheck as IHealthCheck,
9
HEALTH_CHECK_DEFAULTS,
10
HEALTH_CHECK_ACTIONS,
11
validatedHealthCheck,
12
ACTION_INFO,
13
} from "@cocalc/util/db-schema/compute-servers";
14
import { AutomaticShutdownCard } from "./automatic-shutdown";
15
import { setServerConfiguration } from "./api";
16
import { isEqual } from "lodash";
17
import { Icon } from "@cocalc/frontend/components";
18
19
export function HealthCheck({ id, project_id, help }) {
20
const server = useServer({ id, project_id });
21
const [error, setError] = useState<string>("");
22
const [test, setTest] = useState<string>("");
23
const [saving, setSaving] = useState<boolean>(false);
24
const [healthCheck, setHealthCheck] = useState<Partial<IHealthCheck>>(
25
validatedHealthCheck(
26
server?.configuration?.healthCheck ?? HEALTH_CHECK_DEFAULTS,
27
)!,
28
);
29
30
const doTest = async () => {
31
try {
32
setSaving(true);
33
setTest("");
34
const resp = await webapp_client.exec({
35
filesystem: false,
36
compute_server_id: id,
37
project_id,
38
command: healthCheck.command,
39
bash: true,
40
err_on_exit: false,
41
});
42
delete resp.type;
43
setTest(JSON.stringify(resp, undefined, 2));
44
} catch (err) {
45
setError(`${err}`);
46
} finally {
47
setSaving(false);
48
}
49
};
50
const periodSeconds =
51
healthCheck.periodSeconds ?? HEALTH_CHECK_DEFAULTS.periodSeconds;
52
const failureThreshold =
53
healthCheck.failureThreshold ?? HEALTH_CHECK_DEFAULTS.failureThreshold;
54
const initialDelaySeconds =
55
healthCheck.initialDelaySeconds ??
56
HEALTH_CHECK_DEFAULTS.initialDelaySeconds;
57
58
return (
59
<AutomaticShutdownCard
60
title="Health Check"
61
icon="medkit"
62
setEnabled={(enabled) => setHealthCheck({ ...healthCheck, enabled })}
63
save={async () => {
64
await setServerConfiguration({
65
id,
66
configuration: {
67
healthCheck: { ...HEALTH_CHECK_DEFAULTS, ...healthCheck },
68
},
69
});
70
}}
71
hasUnsavedChanges={
72
!isEqual(
73
validatedHealthCheck(
74
server.configuration?.healthCheck ?? HEALTH_CHECK_DEFAULTS,
75
),
76
validatedHealthCheck(healthCheck),
77
) &&
78
healthCheck.failureThreshold != null &&
79
healthCheck.periodSeconds != null &&
80
healthCheck.timeoutSeconds != null &&
81
healthCheck.initialDelaySeconds != null
82
}
83
savedEnabled={!!server.configuration?.healthCheck?.enabled}
84
enabled={healthCheck.enabled}
85
saving={saving}
86
setSaving={setSaving}
87
error={error}
88
setError={setError}
89
confirmSave={
90
healthCheck.action == "deprovision"
91
? "Are you sure? This could automatically delete data."
92
: undefined
93
}
94
>
95
{help && (
96
<div>
97
<p style={{ marginBottom: "15px" }}>
98
Run this bash command on your compute server (from the HOME
99
directory) every {periodSeconds} {plural(periodSeconds, "second")}.
100
If the command fails{" "}
101
{`${failureThreshold} ${plural(failureThreshold, "time")} in a row`}
102
, then {healthCheck.action} the compute server.
103
</p>
104
<ul>
105
<li>
106
NOTE: If you set the action to "Stop" instead of "Reboot" you can
107
use this as an arbitrarily sophisticated way of automatically
108
stopping your compute server. E.g., you can make a script that
109
monitors GPU usage, then stop the compute server.
110
</li>
111
</ul>
112
</div>
113
)}
114
<Space direction="vertical" size="large">
115
<Space style={{ width: "100%" }} wrap>
116
<Input
117
style={{ width: "508px" }}
118
allowClear
119
disabled={saving}
120
value={healthCheck.command}
121
onChange={(e) =>
122
setHealthCheck({ ...healthCheck, command: e.target.value })
123
}
124
placeholder={`Shell Command (bash) -- ${healthCheck.action} when this fails ${failureThreshold} times...`}
125
/>
126
<InputNumber
127
style={{ width: "250px" }}
128
disabled={saving}
129
min={1}
130
step={1}
131
value={healthCheck.timeoutSeconds}
132
onChange={(timeoutSeconds) =>
133
setHealthCheck({
134
...healthCheck,
135
timeoutSeconds: timeoutSeconds ?? undefined,
136
})
137
}
138
addonAfter="seconds timeout"
139
placeholder="Command timeout..."
140
/>
141
</Space>
142
<Space style={{ width: "100%" }} wrap>
143
<InputNumber
144
style={{ width: "250px" }}
145
disabled={saving}
146
min={1}
147
step={1}
148
value={failureThreshold}
149
onChange={(failureThreshold) =>
150
setHealthCheck({
151
...healthCheck,
152
failureThreshold: failureThreshold ?? undefined,
153
})
154
}
155
addonAfter={`attempts before ${healthCheck.action}`}
156
placeholder="Failure threshold..."
157
/>
158
<InputNumber
159
style={{ width: "250px" }}
160
disabled={saving}
161
min={60}
162
step={30}
163
value={periodSeconds}
164
onChange={(periodSeconds) =>
165
setHealthCheck({
166
...healthCheck,
167
periodSeconds: periodSeconds ?? undefined,
168
})
169
}
170
addonAfter="seconds between checks"
171
placeholder="Interval..."
172
/>
173
<InputNumber
174
style={{ width: "250px" }}
175
disabled={saving}
176
min={60}
177
step={30}
178
value={initialDelaySeconds}
179
onChange={(initialDelaySeconds) =>
180
setHealthCheck({
181
...healthCheck,
182
initialDelaySeconds: initialDelaySeconds ?? undefined,
183
})
184
}
185
addonAfter="seconds initial delay"
186
placeholder="Initial delay..."
187
/>
188
</Space>
189
<Space style={{ width: "100%" }}>
190
<div style={{ marginRight: "15px" }}>
191
Action when health check fails:
192
</div>
193
<Radio.Group
194
style={{ flex: 1 }}
195
disabled={saving}
196
options={HEALTH_CHECK_ACTIONS.filter(
197
(action) =>
198
ACTION_INFO[action].isSupported?.(server.configuration) ?? true,
199
).map((action) => {
200
return {
201
label: (
202
<>
203
<Icon name={ACTION_INFO[action].icon as any} />{" "}
204
{capitalize(action)}
205
</>
206
),
207
value: action,
208
};
209
})}
210
optionType="button"
211
buttonStyle="solid"
212
value={healthCheck.action ?? HEALTH_CHECK_DEFAULTS.action}
213
onChange={(e) => {
214
setHealthCheck({ ...healthCheck, action: e.target.value });
215
}}
216
/>
217
</Space>
218
<div style={{ textAlign: "center" }}>
219
<Button
220
disabled={
221
saving ||
222
!healthCheck.command?.trim() ||
223
server.state != "running"
224
}
225
onClick={doTest}
226
>
227
Test
228
{server.state != "running" ? " (start server to test command)" : ""}
229
</Button>
230
</div>
231
{test && (
232
<pre
233
style={{
234
width: "550px",
235
overflow: "auto",
236
background: "#e8e8e8",
237
padding: "15px",
238
borderRadius: "15px",
239
}}
240
>
241
{test}
242
</pre>
243
)}
244
<ShowError
245
error={error}
246
setError={setError}
247
style={{ width: "100%" }}
248
/>
249
</Space>
250
</AutomaticShutdownCard>
251
);
252
}
253
254