Path: blob/master/src/packages/jupyter/ipynb/export-to-ipynb.ts
1447 views
/*1* This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.2* License: MS-RSL – see LICENSE.md for details3*/45/*6Exporting from our in-memory sync-friendly format to ipynb7*/89import { deep_copy, keys, filename_extension } from "@cocalc/util/misc";1011type CellType = "code" | "markdown" | "raw";1213type Tags = { [key: string]: boolean };1415interface Cell {16cell_type?: CellType;17input?: string;18collapsed?: boolean;19scrolled?: boolean;20slide?;21attachments?;22tags?: Tags;23output?: { [n: string]: OutputMessage };24metadata?: Metadata;25exec_count?: number;26}2728type OutputMessage = any;2930interface Metadata {31collapsed?: boolean;32scrolled?: boolean;33cocalc?: {34outputs: { [n: string]: any };35};36slideshow?;37tags?: string[];38}3940export interface IPynbCell {41id: string;42cell_type: CellType;43source?: string[];44metadata?: Metadata;45execution_count?: number;46outputs?: OutputMessage[];47}4849interface BlobStore {50getBase64: (sha1: string) => string | null | undefined | void;51getString: (sha1: string) => string | null | undefined | void;52}5354interface Options {55// list of id's fo the cells in the correct order56cell_list: string[];57// actual data of the cells58cells: { [id: string]: Cell };59// custom metadata only60metadata?;61// official jupyter will give an error on load without properly giving this (and ask to select a kernel)62kernelspec?: object;63language_info?: object;64blob_store?: BlobStore;65// optional map id --> list of additional output messages to replace last output message.66more_output?: { [id: string]: OutputMessage[] };67}6869// **WARNING: any input to export_to_ipynb function may be MUTATED!**70export function export_to_ipynb(opts: Options) {71if (opts.kernelspec == null) {72opts.kernelspec = {};73}74const ipynb = {75cells: opts.cell_list.map((id: string) => cell_to_ipynb(id, opts)),76metadata: opts.metadata ?? {},77nbformat: 4,78nbformat_minor: 4,79};8081ipynb.metadata.kernelspec = opts.kernelspec;82if (opts.language_info != null) {83ipynb.metadata.language_info = opts.language_info;84}8586return ipynb;87}8889// Return ipynb version of the given cell as object90function cell_to_ipynb(id: string, opts: Options) {91const cell = opts.cells[id];92const metadata: Metadata = {};93const obj = {94id,95cell_type: cell.cell_type ?? "code",96source: diff_friendly(cell.input ?? ""),97metadata,98} as IPynbCell;99100// Handle any extra metadata (mostly user defined) that we don't101// handle in a special way for efficiency reasons.102const other_metadata = cell.metadata;103if (other_metadata != null) {104processOtherMetadata(obj, other_metadata);105}106107// consistenty with jupyter -- they explicitly give collapsed true or false state no matter what108metadata.collapsed = !!cell.collapsed;109110// Jupyter only gives scrolled state when true.111if (cell.scrolled) {112metadata.scrolled = true;113}114115const exec_count = cell.exec_count ?? 0;116if (obj.cell_type === "code") {117obj.execution_count = exec_count;118}119120processSlides(obj, cell.slide);121processAttachments(obj, cell.attachments);122processTags(obj, cell.tags);123124if (obj.cell_type !== "code") {125// Code is the only cell type that is allowed to have an outputs field.126return obj;127}128129const output = cell.output;130if (output != null) {131obj.outputs = ipynbOutputs({132output,133exec_count,134more_output: opts.more_output?.[id],135blob_store: opts.blob_store,136});137} else if (obj.outputs == null && obj.cell_type === "code") {138obj.outputs = []; // annoying requirement of ipynb file format.139}140for (const n in obj.outputs) {141const x = obj.outputs[n];142if (x.cocalc != null) {143// alternative version of cell that official Jupyter doesn't support can only144// stored in the **cell-level** metadata, not output.145if (metadata.cocalc == null) {146metadata.cocalc = { outputs: {} };147}148metadata.cocalc.outputs[n] = x.cocalc;149delete x.cocalc;150}151}152return obj;153}154155function processSlides(obj, slide?) {156if (slide != null) {157obj.metadata.slideshow = { slide_type: slide };158}159}160161function processTags(obj, tags?: Tags) {162if (tags != null) {163// we store tags internally as a map (for easy164// efficient add/remove), but .ipynb uses a list.165obj.metadata.tags = keys(tags).sort();166}167}168169function processOtherMetadata(obj, other_metadata) {170if (other_metadata != null) {171Object.assign(obj.metadata, other_metadata);172}173}174175function processAttachments(obj, attachments) {176if (attachments == null) {177// don't have to or can't do anything (https://github.com/sagemathinc/cocalc/issues/4272)178return;179}180obj.attachments = {};181for (const name in attachments) {182const val = attachments[name];183if (val.type !== "base64") {184// we only handle this now185return;186}187let ext = filename_extension(name);188if (ext === "jpg") {189ext = "jpeg";190}191obj.attachments[name] = { [`image/${ext}`]: val.value };192}193}194195function ipynbOutputs({196output,197exec_count,198more_output,199blob_store,200}: {201output: { [n: string]: OutputMessage };202exec_count: number;203more_output?: OutputMessage[];204blob_store?: BlobStore;205}) {206// If the last message has the more_output field, then there may be207// more output messages stored, which are not in the cells object.208let len = objArrayLength(output);209if (output[`${len - 1}`].more_output != null) {210let n: number = len - 1;211const cnt = more_output?.length ?? 0;212if (cnt === 0 || more_output == null) {213// For some reason more output is not available for this cell. So we replace214// the more_output message by an error explaining what happened.215output[`${n}`] = {216text: "WARNING: Some output was deleted.\n",217name: "stderr",218};219} else {220// Indeed, the last message has the more_output field.221// Before converting to ipynb, we remove that last message...222delete output[`${n}`];223// Then we put in the known more output.224for (const mesg of more_output) {225output[`${n}`] = mesg;226n += 1;227}228}229}230// Now, everything continues as normal.231232const outputs: OutputMessage[] = [];233len = objArrayLength(output);234if (output != null && len > 0) {235for (let n = 0; n < len; n++) {236const output_n = output?.[`${n}`];237if (output_n != null) {238processOutputN(output_n, exec_count, blob_store);239outputs.push(output_n);240}241}242}243return outputs;244}245246function objArrayLength(objArray) {247if (objArray == null) {248return 0;249}250let n = -1;251for (const k in objArray) {252const j = parseInt(k);253if (j > n) {254n = j;255}256}257return n + 1;258}259260function processOutputN(261output_n: OutputMessage,262exec_count: number,263blob_store?: BlobStore,264) {265if (output_n == null) {266return;267}268if (output_n.exec_count != null) {269delete output_n.exec_count;270}271if (output_n.text != null) {272output_n.text = diff_friendly(output_n.text);273}274if (output_n.data != null) {275for (let k in output_n.data) {276const v = output_n.data[k];277if (k.slice(0, 5) === "text/") {278output_n.data[k] = diff_friendly(output_n.data[k]);279}280if (k.startsWith("image/") || k === "application/pdf" || k === "iframe") {281if (blob_store != null) {282let value;283if (k === "iframe") {284delete output_n.data[k];285k = "text/html";286value = blob_store.getString(v);287} else {288value = blob_store.getBase64(v);289}290if (value == null) {291// The image is no longer known; this could happen if the user reverts in the history292// browser and there is an image in the output that was not saved in the latest version.293// TODO: instead return an error.294return;295}296output_n.data[k] = value;297} else {298return; // impossible to include in the output without blob_store299}300}301}302output_n.output_type = "execute_result";303if (output_n.metadata == null) {304output_n.metadata = {};305}306output_n.execution_count = exec_count;307} else if (output_n.name != null) {308output_n.output_type = "stream";309if (output_n.name === "input") {310processStdinOutput(output_n);311}312} else if (output_n.ename != null) {313output_n.output_type = "error";314}315}316317function processStdinOutput(output) {318output.cocalc = deep_copy(output);319output.name = "stdout";320output.text = output.opts.prompt + " " + (output.value ?? "");321delete output.opts;322delete output.value;323}324325// Transform a string s with newlines into an array v of strings326// such that v.join('') == s.327function diff_friendly(328s: string | string[] | undefined | null,329): string[] | undefined | null {330if (typeof s !== "string") {331// might already be an array or undefined.332if (s == null) {333return undefined;334}335return s;336}337const v = s.split("\n");338for (let i = 0; i < v.length - 1; i++) {339v[i] += "\n";340}341if (v[v.length - 1] === "") {342v.pop(); // remove last elt343}344return v;345}346347348