Path: blob/master/src/packages/util/db-schema/projects.ts
1447 views
/*1* This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.2* License: MS-RSL – see LICENSE.md for details3*/45import { State } from "@cocalc/util/compute-states";6import { PurchaseInfo } from "@cocalc/util/licenses/purchase/types";7import { deep_copy } from "@cocalc/util/misc";8import {9ExecuteCodeOptions,10ExecuteCodeOptionsAsyncGet,11ExecuteCodeOutput,12} from "@cocalc/util/types/execute-code";13import { DEFAULT_QUOTAS } from "@cocalc/util/upgrade-spec";1415import { NOTES } from "./crm";16import { FALLBACK_COMPUTE_IMAGE } from "./defaults";17import { SCHEMA as schema } from "./index";18import { Table } from "./types";1920export const MAX_FILENAME_SEARCH_RESULTS = 100;2122Table({23name: "projects",24rules: {25primary_key: "project_id",26//# A lot depends on this being right at all times, e.g., restart state,27//# so do not use db_standby yet.28//# It is simply not robust enough.29//# db_standby : 'safer'3031pg_indexes: [32"last_edited",33"created", // TODO: this could have a fillfactor of 10034"USING GIN (users)", // so get_collaborator_ids is fast35"lti_id",36"USING GIN (state)", // so getting all running projects is fast (e.g. for site_license_usage_log... but also manage-state)37"((state #>> '{state}'))", // projecting the "state" (running, etc.) for its own index – the GIN index above still causes a scan, which we want to avoid.38"((state ->> 'state'))", // same reason as above. both syntaxes appear and we have to index both.39"((state IS NULL))", // not covered by the above40"((settings ->> 'always_running'))", // to quickly know which projects have this setting41"((run_quota ->> 'always_running'))", // same reason as above42"deleted", // in various queries we quickly fiter deleted projects43"site_license", // for queries across projects related to site_license#>>{license_id}44],4546crm_indexes: ["last_edited"],4748user_query: {49get: {50pg_where: ["last_edited >= NOW() - interval '21 days'", "projects"],51pg_where_load: ["last_edited >= NOW() - interval '2 days'", "projects"],52options: [{ limit: 100, order_by: "-last_edited" }],53options_load: [{ limit: 15, order_by: "-last_edited" }],54pg_changefeed: "projects",55throttle_changes: 2000,56fields: {57project_id: null,58name: null,59title: "",60description: "",61users: {},62invite: null, // who has been invited to this project via email63invite_requests: null, // who has requested to be invited64deleted: null,65host: null,66settings: DEFAULT_QUOTAS,67run_quota: null,68site_license: null,69status: null,70// security model is anybody with access to the project should be allowed to know this token.71secret_token: null,72state: null,73last_edited: null,74last_active: null,75action_request: null, // last requested action -- {action:?, time:?, started:?, finished:?, err:?}76course: null,77// if the value is not set, we have to use the old default prior to summer 2020 (Ubuntu 18.04, not 20.04!)78compute_image: FALLBACK_COMPUTE_IMAGE,79created: null,80env: null,81sandbox: null,82avatar_image_tiny: null,83// do NOT add avatar_image_full here or it will get included in changefeeds, which we don't want.84// instead it gets its own virtual table.85pay_as_you_go_quotas: null,86},87},88set: {89// NOTE: for security reasons users CANNOT set the course field via a user query;90// instead use the api/v2/projects/course/set-course-field api endpoint.91fields: {92project_id: "project_write",93title: true,94name: true,95description: true,96deleted: true,97invite_requests: true, // project collabs can modify this (e.g., to remove from it once user added or rejected)98users(obj, db, account_id) {99return db._user_set_query_project_users(obj, account_id);100},101action_request: true, // used to request that an action be performed, e.g., "save"; handled by before_change102compute_image: true,103site_license: true,104env: true,105sandbox: true,106avatar_image_tiny: true,107avatar_image_full: true,108},109required_fields: {110project_id: true,111},112before_change(database, old_val, new_val, account_id, cb) {113database._user_set_query_project_change_before(114old_val,115new_val,116account_id,117cb,118);119},120121on_change(database, old_val, new_val, account_id, cb) {122database._user_set_query_project_change_after(123old_val,124new_val,125account_id,126cb,127);128},129},130},131132project_query: {133get: {134pg_where: [{ "project_id = $::UUID": "project_id" }],135fields: {136project_id: null,137title: null,138description: null,139status: null,140},141},142set: {143fields: {144project_id: "project_id",145title: true,146description: true,147status: true,148},149},150},151},152fields: {153project_id: {154type: "uuid",155desc: "The project id, which is the primary key that determines the project.",156},157name: {158type: "string",159pg_type: "VARCHAR(100)",160desc: "The optional name of this project. Must be globally unique (up to case) across all projects with a given *owner*. It can be between 1 and 100 characters from a-z A-Z 0-9 period and dash.",161render: { type: "text", maxLen: 100, editable: true },162},163title: {164type: "string",165desc: "The short title of the project. Should use no special formatting, except hashtags.",166render: { type: "project_link", project_id: "project_id" },167},168description: {169type: "string",170desc: "A longer textual description of the project. This can include hashtags and should be formatted using markdown.",171render: {172type: "markdown",173maxLen: 1024,174editable: true,175},176}, // markdown rendering possibly not implemented177users: {178title: "Collaborators",179type: "map",180desc: "This is a map from account_id's to {hide:bool, group:'owner'|'collaborator', upgrades:{memory:1000, ...}, ssh:{...}}.",181render: { type: "usersmap", editable: true },182},183invite: {184type: "map",185desc: "Map from email addresses to {time:when invite sent, error:error message if there was one}",186date: ["time"],187},188invite_requests: {189type: "map",190desc: "This is a map from account_id's to {timestamp:?, message:'i want to join because...'}.",191date: ["timestamp"],192},193deleted: {194type: "boolean",195desc: "Whether or not this project is deleted.",196render: { type: "boolean", editable: true },197},198host: {199type: "map",200desc: "This is a map {host:'hostname_of_server', assigned:timestamp of when assigned to that server}.",201date: ["assigned"],202},203settings: {204type: "map",205desc: 'This is a map that defines the free base quotas that a project has. It is of the form {cores: 1.5, cpu_shares: 768, disk_quota: 1000, memory: 2000, mintime: 36000000, network: 0, ephemeral_state:0, ephemeral_disk:0, always_running:0}. WARNING: some of the values are strings not numbers in the database right now, e.g., disk_quota:"1000".',206},207site_license: {208type: "map",209desc: "This is a map that defines upgrades (just when running the project) that come from a site license, and also the licenses that are applied to this project. The format is {license_id:{memory:?, mintime:?, ...}} where the target of the license_id is the same as for the settings field. The license_id is the uuid of the license that contributed these upgrades. To tell cocalc to use a license for a project, a user sets site_license to {license_id:{}}, and when it is requested to start the project, the backend decides what allocation license_id provides and changes the field accordingly, i.e., changes {license_id:{},...} to {license_id:{memory:?,...},...}",210},211status: {212type: "map",213desc: "This is a map computed by the status command run inside a project, and slightly enhanced by the compute server, which gives extensive status information about a project. See the exported ProjectStatus interface defined in the code here.",214},215state: {216type: "map",217desc: 'Info about the state of this project of the form {error: "", state: "running" (etc), time: timestamp, ip?:"ip address where project is"}, where time is when the state was last computed. See COMPUTE_STATES in the compute-states file for state.state and the ProjectState interface defined below in code.',218date: ["time"],219},220last_edited: {221type: "timestamp",222desc: "The last time some file was edited in this project. This is the last time that the file_use table was updated for this project.",223},224last_started: {225type: "timestamp",226desc: "The last time the project started running.",227},228last_active: {229type: "map",230desc: "Map from account_id's to the timestamp of when the user with that account_id touched this project.",231date: "all",232},233created: {234type: "timestamp",235desc: "When the project was created.",236},237action_request: {238type: "map",239desc: "Request state change action for project: {action:['start', 'stop'], started:timestamp, err:?, finished:timestamp}",240date: ["started", "finished"],241},242storage: {243type: "map",244desc: "(DEPRECATED) This is a map {host:'hostname_of_server', assigned:when first saved here, saved:when last saved here}.",245date: ["assigned", "saved"],246},247last_backup: {248type: "timestamp",249desc: "(DEPRECATED) Timestamp of last off-disk successful backup using bup to Google cloud storage",250},251storage_request: {252type: "map",253desc: "(DEPRECATED) {action:['save', 'close', 'move', 'open'], requested:timestap, pid:?, target:?, started:timestamp, finished:timestamp, err:?}",254date: ["started", "finished", "requested"],255},256course: {257type: "map",258desc: "{project_id:[id of project that contains .course file], path:[path to .course file], pay:?, payInfo:?, email_address:[optional email address of student -- used if account_id not known], account_id:[account id of student]}, where pay is either not set (or equals falseish) or is a timestamp by which the students must pay. If payInfo is set, it specifies the parameters of the license the students should purchase.",259date: ["pay"],260},261storage_server: {262type: "integer",263desc: "(DEPRECATED) Number of the Kubernetes storage server with the data for this project: one of 0, 1, 2, ...",264},265storage_ready: {266type: "boolean",267desc: "(DEPRECATED) Whether storage is ready to be used on the storage server. Do NOT try to start project until true; this gets set by storage daemon when it notices that run is true.",268},269disk_size: {270type: "integer",271desc: "Size in megabytes of the project disk.",272},273resources: {274type: "map",275desc: 'Object of the form {requests:{memory:"30Mi",cpu:"5m"}, limits:{memory:"100Mi",cpu:"300m"}} which is passed to the k8s resources section for this pod.',276},277preemptible: {278type: "boolean",279desc: "If true, allow to run on preemptible nodes.",280},281idle_timeout: {282type: "integer",283desc: "If given and nonzero, project will be killed if it is idle for this many **minutes**, where idle *means* that last_edited has not been updated.",284},285run_quota: {286type: "map",287desc: "If project is running, this is the quota that it is running with.",288},289compute_image: {290type: "string",291desc: "Specify the name of the underlying (kucalc) compute image.",292},293addons: {294type: "map",295desc: "Configure (kucalc specific) addons for projects. (e.g. academic software, license keys, ...)",296},297lti_id: {298type: "array",299pg_type: "TEXT[]",300desc: "This is a specific ID derived from an LTI context",301},302lti_data: {303type: "map",304desc: "extra information related to LTI",305},306env: {307type: "map",308desc: "Additional environment variables (TS: {[key:string]:string})",309render: { type: "json", editable: true },310},311sandbox: {312type: "boolean",313desc: "If set to true, then any user who attempts to access this project is automatically added as a collaborator to it. Only the project owner can change this setting.",314render: { type: "boolean", editable: true },315},316avatar_image_tiny: {317title: "Image",318type: "string",319desc: "tiny (32x32) visual image associated with the project. Suitable to include as part of changefeed, since about 3kb.",320render: { type: "image" },321},322avatar_image_full: {323title: "Image",324type: "string",325desc: "A visual image associated with the project. Could be 150kb. NOT include as part of changefeed of projects, since potentially big (e.g., 200kb x 1000 projects = 200MB!).",326render: { type: "image" },327},328pay_as_you_go_quotas: {329type: "map",330desc: "Pay as you go quotas that users set so that when they run this project, it gets upgraded to at least what is specified here, and user gets billed later for what is used. Any changes to this table could result in money being spent, so should only be done via the api. This is a map from the account_id of the user that set the quota to the value of the quota spec (which is purchase-quotas.ProjectQuota).",331render: { type: "json", editable: false },332},333notes: NOTES,334secret_token: {335type: "string",336pg_type: "VARCHAR(256)",337desc: "Random ephemeral secret token used temporarily by project to authenticate with hub.",338},339},340});341342export interface ApiKeyInfo {343name: string;344trunc: string;345hash?: string;346used?: number;347}348349// Same query above, but without the last_edited time constraint.350schema.projects_all = deep_copy(schema.projects);351if (352schema.projects_all.user_query?.get == null ||353schema.projects.user_query?.get == null354) {355throw Error("make typescript happy");356}357schema.projects_all.user_query.get.options = [];358schema.projects_all.user_query.get.options_load = [];359schema.projects_all.virtual = "projects";360schema.projects_all.user_query.get.pg_where = ["projects"];361schema.projects_all.user_query.get.pg_where_load = ["projects"];362363// Table that provides extended read info about a single project364// but *ONLY* for admin.365Table({366name: "projects_admin",367fields: schema.projects.fields,368rules: {369primary_key: schema.projects.primary_key,370virtual: "projects",371user_query: {372get: {373admin: true, // only admins can do get queries on this table374// (without this, users who have read access could read)375pg_where: [{ "project_id = $::UUID": "project_id" }],376fields: schema.projects.user_query.get.fields,377},378},379},380});381382/*383Table that enables set queries to the course field of a project. Only384project owners are allowed to use this table. The point is that this makes385it possible for the owner of the project to set things, but not for the386collaborators to set those things.387**wARNING:** right now we're not using this since when multiple people add388students to a course and the 'course' field doesn't get properly set,389much confusion and misery arises.... and it is very hard to fix.390In theory a malicous student could not pay via this. But if they could391mess with their client, they could easily not pay anyways.392*/393Table({394name: "projects_owner",395rules: {396virtual: "projects",397user_query: {398set: {399fields: {400project_id: "project_owner",401course: true,402},403},404},405},406fields: {407project_id: true,408course: true,409},410});411412/*413414Table that enables any signed-in user to set an invite request.415Later: we can make an index so that users can see all outstanding requests they have made easily.416How to test this from the browser console:417project_id = '4e0f5bfd-3f1b-4d7b-9dff-456dcf8725b8' // id of a project you have418invite_requests = {}; invite_requests[smc.client.account_id] = {timestamp:new Date(), message:'please invite me'}419smc.client.query({cb:console.log, query:{project_invite_requests:{project_id:project_id, invite_requests:invite_requests}}}) // set it420smc.redux.getStore('projects').get_project(project_id).invite_requests // see requests for this project421422CURRENTLY NOT USED, but probably will be...423424database._user_set_query_project_invite_requests(old_val, new_val, account_id, cb)425For now don't check anything -- this is how we will make it secure later.426This will:427- that user setting this is signed in428- ensure user only modifies their own entry (for their own id).429- enforce some hard limit on number of outstanding invites (say 30).430- enforce limit on size of invite message.431- sanity check on timestamp432- with an index as mentioned above we could limit the number of projects433to which a single user has requested to be invited.434435*/436Table({437name: "project_invite_requests",438rules: {439virtual: "projects",440primary_key: "project_id",441user_query: {442set: {443fields: {444project_id: true,445invite_requests: true,446},447before_change(_database, _old_val, _new_val, _account_id, cb) {448cb();449},450},451},452}, // actual function will be database._user...453fields: {454project_id: true,455invite_requests: true,456}, // {account_id:{timestamp:?, message:?}, ...}457});458459/*460Virtual table to get project avatar_images.461We don't put this in the main projects table,462since we don't want the avatar_image_full to be463the projects queries or changefeeds, since it464is big, and by default all get fields appear there.465*/466467Table({468name: "project_avatar_images",469rules: {470virtual: "projects",471primary_key: "project_id",472user_query: {473get: {474pg_where: ["projects"],475fields: {476project_id: null,477avatar_image_full: null,478},479},480},481},482fields: {483project_id: true,484avatar_image_full: true,485},486});487488/*489Table to get/set the datastore config in addons.490491The main idea is to set/update/delete entries in the dict addons.datastore.[key] = {...}492*/493Table({494name: "project_datastore",495rules: {496virtual: "projects",497primary_key: "project_id",498user_query: {499set: {500// this also deals with delete requests501fields: {502project_id: true,503addons: true,504},505async instead_of_change(506db,507_old_value,508new_val,509account_id,510cb,511): Promise<void> {512try {513// to delete an entry, pretend to set the datastore = {delete: [name]}514if (typeof new_val.addons.datastore.delete === "string") {515await db.project_datastore_del(516account_id,517new_val.project_id,518new_val.addons.datastore.delete,519);520cb(undefined);521} else {522// query should set addons.datastore.[new key] = config, such that we see here523// new_val = {"project_id":"...","addons":{"datastore":{"key3":{"type":"xxx", ...}}}}524// which will be merged into the existing addons.datastore dict525const res = await db.project_datastore_set(526account_id,527new_val.project_id,528new_val.addons.datastore,529);530cb(undefined, res);531}532} catch (err) {533cb(`${err}`);534}535},536},537get: {538fields: {539project_id: true,540addons: true,541},542async instead_of_query(db, opts, cb): Promise<void> {543if (opts.multi) {544throw Error("'multi' is not implemented");545}546try {547// important: the config dicts for each key must not expose secret credentials!548// check if opts.query.addons === null ?!549const data = await db.project_datastore_get(550opts.account_id,551opts.query.project_id,552);553cb(undefined, data);554} catch (err) {555cb(`${err}`);556}557},558},559},560},561fields: {562project_id: true,563addons: true,564},565});566567export interface ProjectStatus {568"project.pid"?: number; // pid of project server process569"hub-server.port"?: number; // port of tcp server that is listening for conn from hub570"browser-server.port"?: number; // port listening for http/websocket conn from browser client571"sage_server.port"?: number; // port where sage server is listening.572"sage_server.pid"?: number; // pid of sage server process573start_ts?: number; // timestamp, when project server started574session_id?: string; // unique identifyer575version?: number; // version number of project code576disk_MB?: number; // MB of used disk577installed?: boolean; // whether code is installed578memory?: {579count?: number;580pss?: number;581rss?: number;582swap?: number;583uss?: number;584}; // output by smem585}586587export interface ProjectState {588ip?: string; // where the project is running589error?: string;590state?: State; // running, stopped, etc.591time?: Date;592}593594Table({595name: "crm_projects",596fields: schema.projects.fields,597rules: {598primary_key: schema.projects.primary_key,599virtual: "projects",600user_query: {601get: {602admin: true, // only admins can do get queries on this table603// (without this, users who have read access could read)604pg_where: [],605fields: {606...schema.projects.user_query?.get?.fields,607notes: null,608},609},610set: {611admin: true,612fields: {613project_id: true,614name: true,615title: true,616description: true,617deleted: true,618notes: true,619},620},621},622},623});624625export type Datastore = boolean | string[] | undefined;626627// in the future, we might want to extend this to include custom environmment variables628export interface EnvVarsRecord {629inherit?: boolean;630}631export type EnvVars = EnvVarsRecord | undefined;632633export interface StudentProjectFunctionality {634disableActions?: boolean;635disableJupyterToggleReadonly?: boolean;636disableJupyterClassicServer?: boolean;637disableJupyterClassicMode?: boolean;638disableJupyterLabServer?: boolean;639disableRServer?: boolean;640disableVSCodeServer?: boolean;641disableLibrary?: boolean;642disableNetworkWarningBanner?: boolean;643disablePlutoServer?: boolean;644disableTerminals?: boolean;645disableUploads?: boolean;646disableNetwork?: boolean;647disableSSH?: boolean;648disableCollaborators?: boolean;649disableChatGPT?: boolean;650disableSharing?: boolean;651}652653export interface CourseInfo {654type: "student" | "shared" | "nbgrader";655account_id?: string; // account_id of the student that this project is for.656project_id: string; // the course project, i.e., project with the .course file657path: string; // path to the .course file in project_id658pay?: string; // iso timestamp or ""659paid?: string; // iso timestamp with *when* they paid.660purchase_id?: number; // id of purchase record in purchases table.661payInfo?: PurchaseInfo;662email_address?: string;663datastore: Datastore;664student_project_functionality?: StudentProjectFunctionality;665envvars?: EnvVars;666}667668type ExecOptsCommon = {669project_id: string;670cb?: Function; // if given use a callback interface *instead* of async.671};672673export type ExecOptsBlocking = ExecOptsCommon & {674compute_server_id?: number; // if true, run on the compute server (if available)675filesystem?: boolean; // run in fileserver container on compute server; otherwise, runs on main compute container.676path?: string;677command: string;678args?: string[];679timeout?: number;680max_output?: number;681bash?: boolean;682aggregate?: string | number | { value: string | number };683err_on_exit?: boolean;684env?: { [key: string]: string }; // custom environment variables.685async_call?: ExecuteCodeOptions["async_call"];686};687688export type ExecOptsAsync = ExecOptsCommon & {689async_get?: ExecuteCodeOptionsAsyncGet["async_get"];690async_stats?: ExecuteCodeOptionsAsyncGet["async_stats"];691async_await?: ExecuteCodeOptionsAsyncGet["async_await"];692};693694export type ExecOpts = ExecOptsBlocking | ExecOptsAsync;695696export function isExecOptsBlocking(opts: unknown): opts is ExecOptsBlocking {697return (698typeof opts === "object" &&699typeof (opts as any).project_id === "string" &&700typeof (opts as any).command === "string"701);702}703704export type ExecOutput = ExecuteCodeOutput & {705time: number; // time in ms, from user point of view.706};707708export interface CreateProjectOptions {709account_id?: string;710title?: string;711description?: string;712// (optional) image ID713image?: string;714// (optional) license id (or multiple ids separated by commas) -- if given, project will be created with this license715license?: string;716public_path_id?: string; // may imply use of a license717// noPool = do not allow using the pool (e.g., need this when creating projects to put in the pool);718// not a real issue since when creating for pool account_id is null, and then we wouldn't use the pool...719noPool?: boolean;720// start running the moment the project is created -- uses more resources, but possibly better user experience721start?: boolean;722723// admins can specify the project_id - nobody else can -- useful for debugging.724project_id?: string;725}726727interface BaseCopyOptions {728target_project_id?: string;729target_path?: string; // path into project; if not given, defaults to source path above.730overwrite_newer?: boolean; // if true, newer files in target are copied over (otherwise, uses rsync's --update)731delete_missing?: boolean; // if true, delete files in dest path not in source, **including** newer files732backup?: boolean; // make backup files733timeout?: number; // in **seconds**, not milliseconds734bwlimit?: number;735wait_until_done?: boolean; // by default, wait until done. false only gives the ID to query the status later736scheduled?: string | Date; // kucalc only: string (parseable by new Date()), or a Date737public?: boolean; // kucalc only: if true, may use the share server files rather than start the source project running738exclude?: string[]; // options passed to rsync via --exclude739}740export interface UserCopyOptions extends BaseCopyOptions {741account_id?: string;742src_project_id: string;743src_path: string;744// simulate copy taking at least this long -- useful for dev/debugging.745debug_delay_ms?: number;746}747748// for copying files within and between projects749export interface CopyOptions extends BaseCopyOptions {750path: string;751}752753754