Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
sagemathinc
GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/packages/jupyter/ipynb/import-from-ipynb.ts
1447 views
1
/*
2
* This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.
3
* License: MS-RSL – see LICENSE.md for details
4
*/
5
6
/*
7
Importing from an ipynb object (in-memory version of .ipynb file)
8
*/
9
10
import * as misc from "@cocalc/util/misc";
11
import { JUPYTER_MIMETYPES } from "@cocalc/jupyter/util/misc";
12
13
const DEFAULT_IPYNB = {
14
cells: [
15
{
16
cell_type: "code",
17
execution_count: null,
18
metadata: {},
19
outputs: [],
20
source: [],
21
},
22
],
23
metadata: {
24
kernelspec: undefined,
25
language_info: undefined,
26
},
27
nbformat: 4,
28
nbformat_minor: 4,
29
};
30
31
export class IPynbImporter {
32
private _ipynb: any;
33
private _new_id: any;
34
private _output_handler: any;
35
private _existing_ids: any;
36
private _cells: any;
37
private _kernel: any;
38
private _metadata: any;
39
private _language_info: any;
40
import = (opts: any) => {
41
opts = misc.defaults(opts, {
42
ipynb: {},
43
new_id: undefined, // function that returns an unused id given
44
// an is_available function; new_id(is_available) = a new id.
45
existing_ids: [], // re-use these on loading for efficiency purposes
46
output_handler: undefined, // h = output_handler(cell); h.message(...) -- hard to explain
47
}); // process attachments: attachment(base64, mime) --> sha1
48
49
this._ipynb = misc.deep_copy(opts.ipynb);
50
this._new_id = opts.new_id;
51
this._output_handler = opts.output_handler;
52
this._existing_ids = opts.existing_ids; // option to re-use existing ids
53
54
this._handle_old_versions(); // must come before sanity checks, as old versions are "insane". -- see https://github.com/sagemathinc/cocalc/issues/1937
55
this._sanity_improvements();
56
this._import_settings();
57
this._import_metadata();
58
this._read_in_cells();
59
};
60
cells = () => {
61
return this._cells;
62
};
63
64
kernel = () => {
65
return this._kernel;
66
};
67
68
metadata = () => {
69
return this._metadata;
70
};
71
72
close = () => {
73
delete this._cells;
74
delete this._kernel;
75
delete this._metadata;
76
delete this._language_info;
77
delete this._ipynb;
78
delete this._existing_ids;
79
delete this._new_id;
80
delete this._output_handler;
81
};
82
83
// Everything below is the internal private implementation.
84
85
private _sanity_improvements = () => {
86
// Do some basic easy sanity improvements to ipynb boject,
87
// in case parts of the object are missing.
88
const ipynb = this._ipynb;
89
if (ipynb.cells == null || ipynb.cells.length === 0) {
90
ipynb.cells = misc.deep_copy(DEFAULT_IPYNB.cells);
91
}
92
if (ipynb.metadata == null) {
93
ipynb.metadata = misc.deep_copy(DEFAULT_IPYNB.metadata);
94
}
95
if (ipynb.nbformat == null) {
96
ipynb.nbformat = DEFAULT_IPYNB.nbformat;
97
}
98
ipynb.nbformat_minor != null
99
? ipynb.nbformat_minor
100
: (ipynb.nbformat_minor = DEFAULT_IPYNB.nbformat_minor);
101
};
102
103
private _handle_old_versions = () => {
104
// Update the ipynb file from formats before version 4.
105
// There are other changes made when parsing cells.
106
const ipynb = this._ipynb;
107
if (ipynb.nbformat >= 4) {
108
return;
109
}
110
if (ipynb.cells == null) {
111
ipynb.cells = [];
112
}
113
for (const worksheet of ipynb.worksheets || []) {
114
for (const cell of worksheet.cells || []) {
115
if (cell.input != null) {
116
cell.source = cell.input;
117
delete cell.input;
118
}
119
if (cell.cell_type === "heading") {
120
cell.cell_type = "markdown";
121
if (misc.is_array(cell.source)) {
122
cell.source = cell.source.join("");
123
}
124
cell.source = `# ${cell.source}`;
125
}
126
if (cell.outputs) {
127
for (const mesg of cell.outputs) {
128
if (mesg.output_type === "pyout") {
129
for (const type of JUPYTER_MIMETYPES) {
130
const b = type.split("/")[1];
131
if (mesg[b] != null) {
132
const data = { [type]: mesg[b] };
133
for (const k in mesg) {
134
delete mesg[k];
135
}
136
mesg.data = data;
137
break;
138
}
139
}
140
if (mesg.text != null) {
141
const data = { "text/plain": mesg.text.join("") };
142
for (const k in mesg) {
143
delete mesg[k];
144
}
145
mesg.data = data;
146
}
147
}
148
}
149
}
150
ipynb.cells.push(cell);
151
}
152
}
153
};
154
155
_import_settings = () => {
156
this._kernel =
157
this._ipynb &&
158
this._ipynb.metadata &&
159
this._ipynb.metadata.kernelspec &&
160
this._ipynb.metadata.kernelspec.name;
161
if (this._kernel != null) {
162
// kernel names are supposed to be case insensitive
163
// https://jupyter-client.readthedocs.io/en/latest/kernels.html
164
// We also make them all lower case when reading them in at
165
// src/packages/jupyter/kernel/kernel-data.ts
166
this._kernel = this._kernel.toLowerCase();
167
}
168
};
169
170
_import_metadata = () => {
171
const m = this._ipynb != null ? this._ipynb.metadata : undefined;
172
if (m == null) {
173
return;
174
}
175
const metadata: any = {};
176
for (const k in m) {
177
const v = m[k];
178
if (k === "kernelspec") {
179
continue;
180
}
181
metadata[k] = v;
182
}
183
if (misc.len(metadata) > 0) {
184
this._metadata = metadata;
185
}
186
};
187
188
_read_in_cells = () => {
189
const ipynb = this._ipynb;
190
this._cells = {};
191
if ((ipynb != null ? ipynb.cells : undefined) == null) {
192
// nothing to do
193
return;
194
}
195
let n = 0;
196
for (let cell of ipynb.cells) {
197
cell = this._import_cell(cell, n);
198
this._cells[cell.id] = cell;
199
n += 1;
200
}
201
};
202
203
_update_output_format = (content: any) => {
204
if ((this._ipynb != null ? this._ipynb.nbformat : undefined) >= 4) {
205
return content;
206
}
207
// fix old deprecated fields
208
if (content.output_type === "stream") {
209
if (misc.is_array(content.text)) {
210
content.text = content.text.join("");
211
}
212
content.name = content.stream;
213
} else {
214
for (const t of JUPYTER_MIMETYPES) {
215
const b = t.split("/")[1];
216
if (content[b] != null) {
217
content = { data: { [t]: content[b] } };
218
break; // at most one data per message.
219
}
220
}
221
if (content.text != null) {
222
content = {
223
data: { "text/plain": content.text },
224
output_type: "stream",
225
};
226
}
227
}
228
return content;
229
};
230
231
_join_array_strings_obj = (obj: any) => {
232
if (obj != null) {
233
for (const key in obj) {
234
const val = obj[key];
235
if (misc.is_array(val)) {
236
obj[key] = val.join("");
237
}
238
}
239
}
240
return obj;
241
};
242
243
// Mutate content to be of the format we use internally
244
_import_cell_output_content = (content: any): void => {
245
content = this._update_output_format(content); // old versions
246
this._join_array_strings_obj(content.data); // arrays --> strings
247
if (misc.is_array(content.text)) {
248
content.text = content.text.join("");
249
}
250
remove_redundant_reps(content.data); // multiple output formats
251
delete content.prompt_number; // redundant; in some files
252
};
253
254
_id_is_available = (id: any) => {
255
return !(
256
(this._cells != null ? this._cells[id] : undefined) ||
257
(this._existing_ids != null ? this._existing_ids : []).includes(id)
258
);
259
};
260
261
_get_new_id = (cell) => {
262
if (cell?.id && this._id_is_available(cell.id)) {
263
// attempt to use id in the ipynb file
264
return cell.id;
265
}
266
if (this._new_id != null) {
267
return this._new_id(this._id_is_available);
268
} else {
269
let id = 0;
270
while (true) {
271
const s = `${id}`;
272
if (this._id_is_available(s)) {
273
return s;
274
}
275
id += 1;
276
}
277
}
278
};
279
280
_get_exec_count = (execution_count?: number, prompt_number?: number) => {
281
if (execution_count != null) {
282
return execution_count;
283
} else if (prompt_number != null) {
284
return prompt_number;
285
} else {
286
return null;
287
}
288
};
289
290
_get_cell_type = (cell_type?: string) => {
291
return cell_type != null ? cell_type : "code";
292
};
293
294
_get_cell_output = (outputs: any, alt_outputs: any, id: any) => {
295
if (outputs == null || outputs.length == 0) {
296
return null;
297
}
298
let handler: any;
299
const cell: any = { id, output: {} };
300
if (this._output_handler != null) {
301
handler = this._output_handler(cell);
302
}
303
let k: string; // it's perfectly fine that k is a string here.
304
for (k in outputs) {
305
let content = outputs[k];
306
if (alt_outputs != null && alt_outputs[k] != null) {
307
content = alt_outputs[k];
308
}
309
this._import_cell_output_content(content);
310
if (handler != null) {
311
handler.message(content);
312
} else {
313
cell.output[k] = content;
314
}
315
}
316
if (handler != null && typeof handler.done === "function") {
317
handler.done();
318
}
319
return cell.output;
320
};
321
322
_get_cell_input(source) {
323
if (source != null) {
324
// "If you intend to work with notebook files directly, you must allow multi-line
325
// string fields to be either a string or list of strings."
326
// https://nbformat.readthedocs.io/en/latest/format_description.html#top-level-structure
327
if (misc.is_array(source)) {
328
return source.join("");
329
} else {
330
return source;
331
}
332
} else {
333
return null;
334
}
335
}
336
337
_import_cell(cell: any, n: any) {
338
const id =
339
(this._existing_ids != null ? this._existing_ids[n] : undefined) != null
340
? this._existing_ids != null
341
? this._existing_ids[n]
342
: undefined
343
: this._get_new_id(cell);
344
const obj: any = {
345
type: "cell",
346
id,
347
pos: n,
348
input: this._get_cell_input(cell.source),
349
output: this._get_cell_output(
350
cell.outputs,
351
cell.metadata != null && cell.metadata.cocalc != null
352
? cell.metadata.cocalc.outputs
353
: undefined,
354
id,
355
),
356
cell_type: this._get_cell_type(cell.cell_type),
357
exec_count: this._get_exec_count(
358
cell.execution_count,
359
cell.prompt_number,
360
),
361
};
362
363
if (cell.metadata != null) {
364
for (const k of ["collapsed", "scrolled"]) {
365
if (cell.metadata[k]) {
366
obj[k] = !!(cell.metadata != null ? cell.metadata[k] : undefined);
367
}
368
}
369
370
if (cell.metadata.slideshow != null) {
371
obj.slide = cell.metadata.slideshow.slide_type;
372
}
373
374
if (cell.metadata.tags != null) {
375
obj.tags = misc.dict(cell.metadata.tags.map((tag) => [tag, true]));
376
}
377
const other = misc.copy_without(cell.metadata, [
378
"collapsed",
379
"scrolled",
380
"slideshow",
381
"tags",
382
"_root",
383
"__ownerID",
384
"__hash",
385
"__altered",
386
]);
387
// See https://github.com/sagemathinc/cocalc/issues/3191 for
388
// why the _'d ones above; this is to fix "corrupted" worksheets.
389
if (misc.len(other) > 0) {
390
obj.metadata = other;
391
}
392
}
393
if (cell.attachments != null) {
394
obj.attachments = {};
395
for (const name in cell.attachments) {
396
const val = cell.attachments[name];
397
for (const mime in val) {
398
const base64 = val[mime];
399
obj.attachments[name] = { type: "base64", value: base64 };
400
}
401
}
402
}
403
return obj;
404
}
405
}
406
407
export function remove_redundant_reps(data?: any) {
408
if (data == null) {
409
return;
410
}
411
// We only keep the first representation in types, since it provides the richest
412
// representation in the client; there is no need for the others.
413
// TODO: probably we should still store all of these types somewhere (in the
414
// backend only) for the .ipynb export, but I'm not doing that right now!
415
// This means opening and closing an ipynb file may lose information, which
416
// no client currently cares about (?) -- maybe nbconvert does.
417
let keep;
418
for (const type of JUPYTER_MIMETYPES) {
419
if (data[type] != null) {
420
keep = type;
421
break;
422
}
423
}
424
if (keep != null) {
425
for (const type in data) {
426
// NOTE: we only remove multiple reps that are both in JUPYTER_MIMETYPES;
427
// if there is another rep that is NOT in JUPYTER_MIMETYPES, then it is
428
// not removed, e.g., application/vnd.jupyter.widget-view+json and
429
// text/plain both are types of representation of a widget.
430
if (JUPYTER_MIMETYPES[type] !== undefined && type !== keep) {
431
delete data[type];
432
}
433
}
434
}
435
return data;
436
}
437
438