Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
sagemathinc
GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/packages/jupyter/nbgrader/jupyter-run.ts
1447 views
1
/*
2
* This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.
3
* License: MS-RSL – see LICENSE.md for details
4
*/
5
6
import {
7
type JupyterNotebook,
8
type RunNotebookOptions,
9
type Limits,
10
DEFAULT_LIMITS,
11
} from "@cocalc/util/jupyter/nbgrader-types";
12
import type { JupyterKernelInterface as JupyterKernel } from "@cocalc/jupyter/types/project-interface";
13
import { is_object, len, uuid, trunc_middle } from "@cocalc/util/misc";
14
import { retry_until_success } from "@cocalc/util/async-utils";
15
import { kernel } from "@cocalc/jupyter/kernel";
16
import getLogger from "@cocalc/backend/logger";
17
export type { Limits };
18
19
const logger = getLogger("jupyter:nbgrader:jupyter-run");
20
21
function global_timeout_exceeded(limits: Limits): boolean {
22
if (limits.timeout_ms == null || limits.start_time == null) return false;
23
return Date.now() - limits.start_time >= limits.timeout_ms;
24
}
25
26
export async function jupyter_run_notebook(
27
opts: RunNotebookOptions,
28
): Promise<string> {
29
const log = (...args) => {
30
logger.debug("jupyter_run_notebook", ...args);
31
};
32
log(trunc_middle(JSON.stringify(opts)));
33
const notebook: JupyterNotebook = JSON.parse(opts.ipynb);
34
35
let limits: Limits = {
36
timeout_ms: opts.limits?.max_total_time_ms ?? 0,
37
timeout_ms_per_cell: opts.limits?.max_time_per_cell_ms ?? 0,
38
max_output: opts.limits?.max_output ?? 0,
39
max_output_per_cell: opts.limits?.max_output_per_cell ?? 0,
40
start_time: Date.now(),
41
total_output: 0,
42
};
43
44
const name = notebook.metadata.kernelspec.name;
45
let jupyter: JupyterKernel | undefined = undefined;
46
47
/* We use retry_until_success to spawn the kernel, since
48
it makes people's lives much easier if this works even
49
if there is a temporary issue. Also, in testing, I've
50
found that sometimes if you try to spawn two kernels at
51
the exact same time as the same user things can fail
52
This is possibly an upstream Jupyter bug, but let's
53
just work around it since we want extra reliability
54
anyways.
55
*/
56
async function init_jupyter0(): Promise<void> {
57
log("init_jupyter", jupyter != null);
58
jupyter?.close();
59
jupyter = undefined;
60
// path is random so it doesn't randomly conflict with
61
// something else running at the same time.
62
const path = opts.path + `/${uuid()}.ipynb`;
63
jupyter = kernel({ name, path });
64
log("init_jupyter: spawning");
65
// for Python, we suppress all warnings
66
// they end up as stderr-output and hence would imply 0 points
67
const env = { PYTHONWARNINGS: "ignore" };
68
await jupyter.spawn({ env });
69
log("init_jupyter: spawned");
70
}
71
72
async function init_jupyter(): Promise<void> {
73
await retry_until_success({
74
f: init_jupyter0,
75
start_delay: 1000,
76
max_delay: 5000,
77
factor: 1.4,
78
max_time: 30000,
79
log: function (...args) {
80
log("init_jupyter - retry_until_success", ...args);
81
},
82
});
83
}
84
85
try {
86
log("init_jupyter...");
87
await init_jupyter();
88
log("init_jupyter: done");
89
for (const cell of notebook.cells) {
90
try {
91
if (jupyter == null) {
92
log("BUG: jupyter==null");
93
throw Error("jupyter can't be null since it was initialized above");
94
}
95
log("run_cell...");
96
await run_cell(jupyter, limits, cell); // mutates cell by putting in outputs
97
log("run_cell: done");
98
} catch (err) {
99
// fatal error occured, e.g,. timeout, broken kernel, etc.
100
if (cell.outputs == null) {
101
cell.outputs = [];
102
}
103
cell.outputs.push({ traceback: [`${err}`] });
104
if (!global_timeout_exceeded(limits)) {
105
// close existing jupyter and spawn new one, so we can robustly run more cells.
106
// Obviously, only do this if we are not out of time.
107
log("timeout exceeded so restarting...");
108
await init_jupyter();
109
log("timeout exceeded restart done");
110
}
111
}
112
}
113
} finally {
114
log("in finally");
115
if (jupyter != null) {
116
log("jupyter != null so closing");
117
// @ts-ignore
118
jupyter.close();
119
jupyter = undefined;
120
}
121
}
122
log("returning result");
123
return JSON.stringify(notebook);
124
}
125
126
export async function run_cell(
127
jupyter: JupyterKernel,
128
limits0: Partial<Limits>,
129
cell,
130
): Promise<void> {
131
if (jupyter == null) {
132
throw Error("jupyter must be defined");
133
}
134
const limits = { ...DEFAULT_LIMITS, ...limits0 };
135
136
if (limits.timeout_ms && global_timeout_exceeded(limits)) {
137
// the total time has been exceeded -- this will mark outputs as error
138
// for each cell in the rest of the notebook.
139
throw Error(
140
`Total time limit (=${Math.round(
141
limits.timeout_ms / 1000,
142
)} seconds) exceeded`,
143
);
144
}
145
146
if (cell.cell_type != "code") {
147
// skip all non-code cells -- nothing to run
148
return;
149
}
150
const code = cell.source.join("");
151
if (cell.outputs == null) {
152
// shouldn't happen, since this would violate nbformat, but let's ensure
153
// it anyways, just in case.
154
cell.outputs = [];
155
}
156
157
const result = await jupyter.execute_code_now({
158
code,
159
timeout_ms: limits.timeout_ms_per_cell,
160
});
161
162
let cell_output_chars = 0;
163
for (const x of result) {
164
if (x == null) continue;
165
if (x["msg_type"] == "clear_output") {
166
cell.outputs = [];
167
}
168
const mesg: any = x["content"];
169
if (mesg == null) continue;
170
if (mesg.comm_id != null) {
171
// ignore any comm/widget related messages
172
continue;
173
}
174
delete mesg.execution_state;
175
delete mesg.execution_count;
176
delete mesg.payload;
177
delete mesg.code;
178
delete mesg.status;
179
delete mesg.source;
180
for (const k in mesg) {
181
const v = mesg[k];
182
if (is_object(v) && len(v) === 0) {
183
delete mesg[k];
184
}
185
}
186
if (len(mesg) == 0) continue;
187
const n = JSON.stringify(mesg).length;
188
limits.total_output += n;
189
if (limits.max_output_per_cell) {
190
cell_output_chars += n;
191
}
192
if (mesg["traceback"] != null) {
193
// always include tracebacks
194
cell.outputs.push(mesg);
195
} else {
196
if (
197
limits.max_output_per_cell &&
198
cell_output_chars > limits.max_output_per_cell
199
) {
200
// Use stdout stream -- it's not an *error* that there is
201
// truncated output; just something we want to mention.
202
cell.outputs.push({
203
name: "stdout",
204
output_type: "stream",
205
text: [
206
`Output truncated since it exceeded the cell output limit of ${limits.max_output_per_cell} characters`,
207
],
208
});
209
} else if (limits.max_output && limits.total_output > limits.max_output) {
210
cell.outputs.push({
211
name: "stdout",
212
output_type: "stream",
213
text: [
214
`Output truncated since it exceeded the global output limit of ${limits.max_output} characters`,
215
],
216
});
217
} else {
218
cell.outputs.push(mesg);
219
}
220
}
221
}
222
}
223
224