Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
sagemathinc
GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/packages/jupyter/ipynb/export-to-ipynb.ts
1447 views
1
/*
2
* This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.
3
* License: MS-RSL – see LICENSE.md for details
4
*/
5
6
/*
7
Exporting from our in-memory sync-friendly format to ipynb
8
*/
9
10
import { deep_copy, keys, filename_extension } from "@cocalc/util/misc";
11
12
type CellType = "code" | "markdown" | "raw";
13
14
type Tags = { [key: string]: boolean };
15
16
interface Cell {
17
cell_type?: CellType;
18
input?: string;
19
collapsed?: boolean;
20
scrolled?: boolean;
21
slide?;
22
attachments?;
23
tags?: Tags;
24
output?: { [n: string]: OutputMessage };
25
metadata?: Metadata;
26
exec_count?: number;
27
}
28
29
type OutputMessage = any;
30
31
interface Metadata {
32
collapsed?: boolean;
33
scrolled?: boolean;
34
cocalc?: {
35
outputs: { [n: string]: any };
36
};
37
slideshow?;
38
tags?: string[];
39
}
40
41
export interface IPynbCell {
42
id: string;
43
cell_type: CellType;
44
source?: string[];
45
metadata?: Metadata;
46
execution_count?: number;
47
outputs?: OutputMessage[];
48
}
49
50
interface BlobStore {
51
getBase64: (sha1: string) => string | null | undefined | void;
52
getString: (sha1: string) => string | null | undefined | void;
53
}
54
55
interface Options {
56
// list of id's fo the cells in the correct order
57
cell_list: string[];
58
// actual data of the cells
59
cells: { [id: string]: Cell };
60
// custom metadata only
61
metadata?;
62
// official jupyter will give an error on load without properly giving this (and ask to select a kernel)
63
kernelspec?: object;
64
language_info?: object;
65
blob_store?: BlobStore;
66
// optional map id --> list of additional output messages to replace last output message.
67
more_output?: { [id: string]: OutputMessage[] };
68
}
69
70
// **WARNING: any input to export_to_ipynb function may be MUTATED!**
71
export function export_to_ipynb(opts: Options) {
72
if (opts.kernelspec == null) {
73
opts.kernelspec = {};
74
}
75
const ipynb = {
76
cells: opts.cell_list.map((id: string) => cell_to_ipynb(id, opts)),
77
metadata: opts.metadata ?? {},
78
nbformat: 4,
79
nbformat_minor: 4,
80
};
81
82
ipynb.metadata.kernelspec = opts.kernelspec;
83
if (opts.language_info != null) {
84
ipynb.metadata.language_info = opts.language_info;
85
}
86
87
return ipynb;
88
}
89
90
// Return ipynb version of the given cell as object
91
function cell_to_ipynb(id: string, opts: Options) {
92
const cell = opts.cells[id];
93
const metadata: Metadata = {};
94
const obj = {
95
id,
96
cell_type: cell.cell_type ?? "code",
97
source: diff_friendly(cell.input ?? ""),
98
metadata,
99
} as IPynbCell;
100
101
// Handle any extra metadata (mostly user defined) that we don't
102
// handle in a special way for efficiency reasons.
103
const other_metadata = cell.metadata;
104
if (other_metadata != null) {
105
processOtherMetadata(obj, other_metadata);
106
}
107
108
// consistenty with jupyter -- they explicitly give collapsed true or false state no matter what
109
metadata.collapsed = !!cell.collapsed;
110
111
// Jupyter only gives scrolled state when true.
112
if (cell.scrolled) {
113
metadata.scrolled = true;
114
}
115
116
const exec_count = cell.exec_count ?? 0;
117
if (obj.cell_type === "code") {
118
obj.execution_count = exec_count;
119
}
120
121
processSlides(obj, cell.slide);
122
processAttachments(obj, cell.attachments);
123
processTags(obj, cell.tags);
124
125
if (obj.cell_type !== "code") {
126
// Code is the only cell type that is allowed to have an outputs field.
127
return obj;
128
}
129
130
const output = cell.output;
131
if (output != null) {
132
obj.outputs = ipynbOutputs({
133
output,
134
exec_count,
135
more_output: opts.more_output?.[id],
136
blob_store: opts.blob_store,
137
});
138
} else if (obj.outputs == null && obj.cell_type === "code") {
139
obj.outputs = []; // annoying requirement of ipynb file format.
140
}
141
for (const n in obj.outputs) {
142
const x = obj.outputs[n];
143
if (x.cocalc != null) {
144
// alternative version of cell that official Jupyter doesn't support can only
145
// stored in the **cell-level** metadata, not output.
146
if (metadata.cocalc == null) {
147
metadata.cocalc = { outputs: {} };
148
}
149
metadata.cocalc.outputs[n] = x.cocalc;
150
delete x.cocalc;
151
}
152
}
153
return obj;
154
}
155
156
function processSlides(obj, slide?) {
157
if (slide != null) {
158
obj.metadata.slideshow = { slide_type: slide };
159
}
160
}
161
162
function processTags(obj, tags?: Tags) {
163
if (tags != null) {
164
// we store tags internally as a map (for easy
165
// efficient add/remove), but .ipynb uses a list.
166
obj.metadata.tags = keys(tags).sort();
167
}
168
}
169
170
function processOtherMetadata(obj, other_metadata) {
171
if (other_metadata != null) {
172
Object.assign(obj.metadata, other_metadata);
173
}
174
}
175
176
function processAttachments(obj, attachments) {
177
if (attachments == null) {
178
// don't have to or can't do anything (https://github.com/sagemathinc/cocalc/issues/4272)
179
return;
180
}
181
obj.attachments = {};
182
for (const name in attachments) {
183
const val = attachments[name];
184
if (val.type !== "base64") {
185
// we only handle this now
186
return;
187
}
188
let ext = filename_extension(name);
189
if (ext === "jpg") {
190
ext = "jpeg";
191
}
192
obj.attachments[name] = { [`image/${ext}`]: val.value };
193
}
194
}
195
196
function ipynbOutputs({
197
output,
198
exec_count,
199
more_output,
200
blob_store,
201
}: {
202
output: { [n: string]: OutputMessage };
203
exec_count: number;
204
more_output?: OutputMessage[];
205
blob_store?: BlobStore;
206
}) {
207
// If the last message has the more_output field, then there may be
208
// more output messages stored, which are not in the cells object.
209
let len = objArrayLength(output);
210
if (output[`${len - 1}`].more_output != null) {
211
let n: number = len - 1;
212
const cnt = more_output?.length ?? 0;
213
if (cnt === 0 || more_output == null) {
214
// For some reason more output is not available for this cell. So we replace
215
// the more_output message by an error explaining what happened.
216
output[`${n}`] = {
217
text: "WARNING: Some output was deleted.\n",
218
name: "stderr",
219
};
220
} else {
221
// Indeed, the last message has the more_output field.
222
// Before converting to ipynb, we remove that last message...
223
delete output[`${n}`];
224
// Then we put in the known more output.
225
for (const mesg of more_output) {
226
output[`${n}`] = mesg;
227
n += 1;
228
}
229
}
230
}
231
// Now, everything continues as normal.
232
233
const outputs: OutputMessage[] = [];
234
len = objArrayLength(output);
235
if (output != null && len > 0) {
236
for (let n = 0; n < len; n++) {
237
const output_n = output?.[`${n}`];
238
if (output_n != null) {
239
processOutputN(output_n, exec_count, blob_store);
240
outputs.push(output_n);
241
}
242
}
243
}
244
return outputs;
245
}
246
247
function objArrayLength(objArray) {
248
if (objArray == null) {
249
return 0;
250
}
251
let n = -1;
252
for (const k in objArray) {
253
const j = parseInt(k);
254
if (j > n) {
255
n = j;
256
}
257
}
258
return n + 1;
259
}
260
261
function processOutputN(
262
output_n: OutputMessage,
263
exec_count: number,
264
blob_store?: BlobStore,
265
) {
266
if (output_n == null) {
267
return;
268
}
269
if (output_n.exec_count != null) {
270
delete output_n.exec_count;
271
}
272
if (output_n.text != null) {
273
output_n.text = diff_friendly(output_n.text);
274
}
275
if (output_n.data != null) {
276
for (let k in output_n.data) {
277
const v = output_n.data[k];
278
if (k.slice(0, 5) === "text/") {
279
output_n.data[k] = diff_friendly(output_n.data[k]);
280
}
281
if (k.startsWith("image/") || k === "application/pdf" || k === "iframe") {
282
if (blob_store != null) {
283
let value;
284
if (k === "iframe") {
285
delete output_n.data[k];
286
k = "text/html";
287
value = blob_store.getString(v);
288
} else {
289
value = blob_store.getBase64(v);
290
}
291
if (value == null) {
292
// The image is no longer known; this could happen if the user reverts in the history
293
// browser and there is an image in the output that was not saved in the latest version.
294
// TODO: instead return an error.
295
return;
296
}
297
output_n.data[k] = value;
298
} else {
299
return; // impossible to include in the output without blob_store
300
}
301
}
302
}
303
output_n.output_type = "execute_result";
304
if (output_n.metadata == null) {
305
output_n.metadata = {};
306
}
307
output_n.execution_count = exec_count;
308
} else if (output_n.name != null) {
309
output_n.output_type = "stream";
310
if (output_n.name === "input") {
311
processStdinOutput(output_n);
312
}
313
} else if (output_n.ename != null) {
314
output_n.output_type = "error";
315
}
316
}
317
318
function processStdinOutput(output) {
319
output.cocalc = deep_copy(output);
320
output.name = "stdout";
321
output.text = output.opts.prompt + " " + (output.value ?? "");
322
delete output.opts;
323
delete output.value;
324
}
325
326
// Transform a string s with newlines into an array v of strings
327
// such that v.join('') == s.
328
function diff_friendly(
329
s: string | string[] | undefined | null,
330
): string[] | undefined | null {
331
if (typeof s !== "string") {
332
// might already be an array or undefined.
333
if (s == null) {
334
return undefined;
335
}
336
return s;
337
}
338
const v = s.split("\n");
339
for (let i = 0; i < v.length - 1; i++) {
340
v[i] += "\n";
341
}
342
if (v[v.length - 1] === "") {
343
v.pop(); // remove last elt
344
}
345
return v;
346
}
347
348