Path: blob/master/src/packages/database/postgres-user-queries.coffee
1496 views
#########################################################################1# This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.2# License: MS-RSL – see LICENSE.md for details3#########################################################################45"""6User (and project) client queries78COPYRIGHT : (c) 2017 SageMath, Inc.9LICENSE : MS-RSL10"""1112MAX_CHANGEFEEDS_PER_CLIENT = 20001314# Reject all patches that have timestamp that is more than 3 minutes in the future.15MAX_PATCH_FUTURE_MS = 1000*60*31617EventEmitter = require('events')18async = require('async')19lodash = require('lodash')2021{one_result, all_results, count_result, pg_type, quote_field} = require('./postgres-base')2223{UserQueryQueue} = require('./postgres-user-query-queue')2425{defaults} = misc = require('@cocalc/util/misc')26required = defaults.required2728{PROJECT_UPGRADES, SCHEMA, OPERATORS, isToOperand} = require('@cocalc/util/schema')29{queryIsCmp, userGetQueryFilter} = require("./user-query/user-get-query")3031{updateRetentionData} = require('./postgres/retention')3233{ checkProjectName } = require("@cocalc/util/db-schema/name-rules");34{callback2} = require('@cocalc/util/async-utils')353637exports.extend_PostgreSQL = (ext) -> class PostgreSQL extends ext38# Cancel all queued up queries by the given client39cancel_user_queries: (opts) =>40opts = defaults opts,41client_id : required42@_user_query_queue?.cancel_user_queries(opts)4344user_query: (opts) =>45opts = defaults opts,46client_id : undefined # if given, uses to control number of queries at once by one client.47priority : undefined # (NOT IMPLEMENTED) priority for this query (an integer [-10,...,19] like in UNIX)48account_id : undefined49project_id : undefined50query : required51options : []52changes : undefined53cb : undefined5455if opts.account_id?56# Check for "sudo" by admin to query as a different user, which is done by specifying57# options = [..., {account_id:'uuid'}, ...].58for x in opts.options59if x.account_id?60# Check user is an admin, then change opts.account_id61@get_account62columns : ['groups']63account_id : opts.account_id64cb : (err, r) =>65if err66opts.cb?(err)67else if r['groups']? and 'admin' in r['groups']68opts.account_id = x.account_id69opts.options = (y for y in opts.options when not y['account_id']?)70# now do query with new opts and options not including account_id sudo.71@user_query(opts)72else73opts.cb?('user must be admin to sudo')74return7576if not opts.client_id?77# No client_id given, so do not use query queue.78delete opts.priority79delete opts.client_id80@_user_query(opts)81return8283if not @_user_query_queue?84o =85do_query : @_user_query86dbg : @_dbg('user_query_queue')87concurrent : @concurrent88@_user_query_queue ?= new UserQueryQueue(o)8990@_user_query_queue.user_query(opts)9192_user_query: (opts) =>93opts = defaults opts,94account_id : undefined95project_id : undefined96query : required97options : [] # used for initial query; **IGNORED** by changefeed!;98# - Use [{set:true}] or [{set:false}] to force get or set query99# - For a set query, use {delete:true} to delete instead of set. This is the only way100# to delete a record, and won't work unless delete:true is set in the schema101# for the table to explicitly allow deleting.102changes : undefined # id of change feed103cb : undefined # cb(err, result) # WARNING -- this *will* get called multiple times when changes is true!104id = misc.uuid().slice(0,6)105dbg = @_dbg("_user_query(id=#{id})")106dbg(misc.to_json(opts.query))107if misc.is_array(opts.query)108dbg('array query instead')109@_user_query_array(opts)110return111112subs =113'{account_id}' : opts.account_id114'{project_id}' : opts.project_id115'{now}' : new Date()116117if opts.changes?118changes =119id : opts.changes120cb : opts.cb121122v = misc.keys(opts.query)123if v.length > 1124dbg("FATAL no key")125opts.cb?('FATAL: must specify exactly one key in the query')126return127table = v[0]128query = opts.query[table]129if misc.is_array(query)130if query.length > 1131dbg("FATAL not implemented")132opts.cb?("FATAL: array of length > 1 not yet implemented")133return134multi = true135query = query[0]136else137multi = false138is_set_query = undefined139if opts.options?140if not misc.is_array(opts.options)141dbg("FATAL options")142opts.cb?("FATAL: options (=#{misc.to_json(opts.options)}) must be an array")143return144for x in opts.options145if x.set?146is_set_query = !!x.set147options = (x for x in opts.options when not x.set?)148else149options = []150151if misc.is_object(query)152query = misc.deep_copy(query)153misc.obj_key_subs(query, subs)154if not is_set_query?155is_set_query = not misc.has_null_leaf(query)156if is_set_query157dbg("do a set query")158if changes159dbg("FATAL: changefeed")160opts.cb?("FATAL: changefeeds only for read queries")161return162if not opts.account_id? and not opts.project_id?163dbg("FATAL: anon set")164opts.cb?("FATAL: no anonymous set queries")165return166dbg("user_set_query")167@user_set_query168account_id : opts.account_id169project_id : opts.project_id170table : table171query : query172options : opts.options173cb : (err, x) =>174dbg("returned #{err}")175opts.cb?(err, {"#{table}":x})176else177# do a get query178if changes and not multi179dbg("FATAL: changefeed multi")180opts.cb?("FATAL: changefeeds only implemented for multi-document queries")181return182183if changes184err = @_inc_changefeed_count(opts.account_id, opts.project_id, table, changes.id)185if err186dbg("err changefeed count -- #{err}")187opts.cb?(err)188return189190dbg("user_get_query")191@user_get_query192account_id : opts.account_id193project_id : opts.project_id194table : table195query : query196options : options197multi : multi198changes : changes199cb : (err, x) =>200dbg("returned #{err}")201if err and changes202# didn't actually make the changefeed, so don't count it.203@_dec_changefeed_count(changes.id, table)204opts.cb?(err, if not err then {"#{table}":x})205else206dbg("FATAL - invalid table")207opts.cb?("FATAL: invalid user_query of '#{table}' -- query must be an object")208209###210TRACK CHANGEFEED COUNTS211212_inc and dec below are evidently broken, in that it's CRITICAL that they match up exactly, or users will be213locked out until they just happen to switch to another hub with different tracking, which is silly.214215TODO: DISABLED FOR NOW!216###217218# Increment a count of the number of changefeeds by a given client so we can cap it.219_inc_changefeed_count: (account_id, project_id, table, changefeed_id) =>220return221client_name = "#{account_id}-#{project_id}"222cnt = @_user_get_changefeed_counts ?= {}223ids = @_user_get_changefeed_id_to_user ?= {}224if not cnt[client_name]?225cnt[client_name] = 1226else if cnt[client_name] >= MAX_CHANGEFEEDS_PER_CLIENT227return "user may create at most #{MAX_CHANGEFEEDS_PER_CLIENT} changefeeds; please close files, refresh browser, restart project"228else229# increment before successfully making get_query to prevent huge bursts causing trouble!230cnt[client_name] += 1231@_dbg("_inc_changefeed_count(table='#{table}')")("{#{client_name}:#{cnt[client_name]} ...}")232ids[changefeed_id] = client_name233return false234235# Corresponding decrement of count of the number of changefeeds by a given client.236_dec_changefeed_count: (id, table) =>237return238client_name = @_user_get_changefeed_id_to_user[id]239if client_name?240@_user_get_changefeed_counts?[client_name] -= 1241delete @_user_get_changefeed_id_to_user[id]242cnt = @_user_get_changefeed_counts243if table?244t = "(table='#{table}')"245else246t = ""247@_dbg("_dec_changefeed_count#{t}")("counts={#{client_name}:#{cnt[client_name]} ...}")248249# Handle user_query when opts.query is an array. opts below are as for user_query.250_user_query_array: (opts) =>251if opts.changes and opts.query.length > 1252opts.cb?("FATAL: changefeeds only implemented for single table")253return254result = []255f = (query, cb) =>256@user_query257account_id : opts.account_id258project_id : opts.project_id259query : query260options : opts.options261cb : (err, x) =>262result.push(x); cb(err)263async.mapSeries(opts.query, f, (err) => opts.cb(err, result))264265user_query_cancel_changefeed: (opts) =>266opts = defaults opts,267id : required268cb : undefined # not really asynchronous269dbg = @_dbg("user_query_cancel_changefeed(id='#{opts.id}')")270feed = @_changefeeds?[opts.id]271if feed?272dbg("actually canceling feed")273@_dec_changefeed_count(opts.id)274delete @_changefeeds[opts.id]275feed.close()276else277dbg("already canceled before (no such feed)")278opts.cb?()279280_user_get_query_columns: (query, remove_from_query) =>281v = misc.keys(query)282if remove_from_query?283# If remove_from_query is specified it should be an array of strings284# and we do not includes these in what is returned.285v = lodash.difference(v, remove_from_query)286return v287288_require_is_admin: (account_id, cb) =>289if not account_id?290cb("FATAL: user must be an admin")291return292@is_admin293account_id : account_id294cb : (err, is_admin) =>295if err296cb(err)297else if not is_admin298cb("FATAL: user must be an admin")299else300cb()301302# Ensure that each project_id in project_ids is such that the account is in one of the given303# groups for the project, or that the account is an admin. If not, cb(err).304_require_project_ids_in_groups: (account_id, project_ids, groups, cb) =>305s = {"#{account_id}": true}306require_admin = false307@_query308query : "SELECT project_id, users#>'{#{account_id}}' AS user FROM projects"309where : "project_id = ANY($)":project_ids310cache : true311cb : all_results (err, x) =>312if err313cb(err)314else315known_project_ids = {} # we use this to ensure that each of the given project_ids exists.316for p in x317known_project_ids[p.project_id] = true318if p.user?.group not in groups319require_admin = true320# If any of the project_ids don't exist, reject the query.321for project_id in project_ids322if not known_project_ids[project_id]323cb("FATAL: unknown project_id '#{misc.trunc(project_id,100)}'")324return325if require_admin326@_require_is_admin(account_id, cb)327else328cb()329330_query_parse_options: (options) =>331r = {}332for x in options333for name, value of x334switch name335when 'only_changes'336r.only_changes = !!value337when 'limit'338r.limit = parseInt(value)339when 'slice'340r.slice = value341when 'order_by'342if value[0] == '-'343value = value.slice(1) + " DESC "344if r.order_by345r.order_by = r.order_by + ', ' + value346else347r.order_by = value348when 'delete'349null350# ignore delete here - is parsed elsewhere351when 'heartbeat'352@_dbg("_query_parse_options")("TODO/WARNING -- ignoring heartbeat option from old client")353else354r.err = "unknown option '#{name}'"355# Guard rails: no matter what, all queries are capped with a limit of 100000.356# TODO: If somehow somebody has, e.g., more than 100K projects, or maybe more357# than 100K edits of a single file, they could hit this and not realize it. I358# had this set at 1000 for a few minutes and it caused me to randomly not have359# some of my projects.360MAX_LIMIT = 100000361try362if not isFinite(r.limit)363r.limit = MAX_LIMIT364else if r.limit > MAX_LIMIT365r.limit = MAX_LIMIT366catch367r.limit = MAX_LIMIT368return r369370###371SET QUERIES372###373_parse_set_query_opts: (opts) =>374r = {}375376if opts.project_id?377dbg = r.dbg = @_dbg("user_set_query(project_id='#{opts.project_id}', table='#{opts.table}')")378else if opts.account_id?379dbg = r.dbg = @_dbg("user_set_query(account_id='#{opts.account_id}', table='#{opts.table}')")380else381return {err:"FATAL: account_id or project_id must be specified to set query on table='#{opts.table}'"}382383if not SCHEMA[opts.table]?384return {err:"FATAL: table '#{opts.table}' does not exist"}385386dbg(misc.to_json(opts.query))387388if opts.options389dbg("options=#{misc.to_json(opts.options)}")390391r.query = misc.copy(opts.query)392r.table = opts.table393r.db_table = SCHEMA[opts.table].virtual ? opts.table394r.account_id = opts.account_id395r.project_id = opts.project_id396397s = SCHEMA[opts.table]398399if opts.account_id?400r.client_query = s?.user_query401else402r.client_query = s?.project_query403404if not r.client_query?.set?.fields?405return {err:"FATAL: user set queries not allowed for table '#{opts.table}'"}406407if not @_mod_fields(opts.query, r.client_query)408dbg("shortcut -- no fields will be modified, so nothing to do")409return410411for field in misc.keys(r.client_query.set.fields)412if r.client_query.set.fields[field] == undefined413return {err: "FATAL: user set query not allowed for #{opts.table}.#{field}"}414val = r.client_query.set.fields[field]415416if typeof(val) == 'function'417try418r.query[field] = val(r.query, @)419catch err420return {err:"FATAL: error setting '#{field}' -- #{err}"}421else422switch val423when 'account_id'424if not r.account_id?425return {err: "FATAL: account_id must be specified -- make sure you are signed in"}426r.query[field] = r.account_id427when 'project_id'428if not r.project_id?429return {err: "FATAL: project_id must be specified"}430r.query[field] = r.project_id431when 'time_id'432r.query[field] = uuid.v1()433when 'project_write'434if not r.query[field]?435return {err: "FATAL: must specify #{opts.table}.#{field}"}436r.require_project_ids_write_access = [r.query[field]]437when 'project_owner'438if not r.query[field]?439return {err:"FATAL: must specify #{opts.table}.#{field}"}440r.require_project_ids_owner = [r.query[field]]441442if r.client_query.set.admin443r.require_admin = true444445r.primary_keys = @_primary_keys(r.db_table)446447r.json_fields = @_json_fields(r.db_table, r.query)448449for k, v of r.query450if k in r.primary_keys451continue452if r.client_query?.set?.fields?[k] != undefined453continue454if s.admin_query?.set?.fields?[k] != undefined455r.require_admin = true456continue457return {err: "FATAL: changing #{r.table}.#{k} not allowed"}458459# HOOKS which allow for running arbitrary code in response to460# user set queries. In each case, new_val below is only the part461# of the object that the user requested to change.462463# 0. CHECK: Runs before doing any further processing; has callback, so this464# provides a generic way to quickly check whether or not this query is allowed465# for things that can't be done declaratively. The check_hook can also466# mutate the obj (the user query), e.g., to enforce limits on input size.467r.check_hook = r.client_query.set.check_hook468469# 1. BEFORE: If before_change is set, it is called with input470# (database, old_val, new_val, account_id, cb)471# before the actual change to the database is made.472r.before_change_hook = r.client_query.set.before_change473474# 2. INSTEAD OF: If instead_of_change is set, then instead_of_change_hook475# is called with input476# (database, old_val, new_val, account_id, cb)477# *instead* of actually doing the update/insert to478# the database. This makes it possible to run arbitrary479# code whenever the user does a certain type of set query.480# Obviously, if that code doesn't set the new_val in the481# database, then new_val won't be the new val.482r.instead_of_change_hook = r.client_query.set.instead_of_change483484# 3. AFTER: If set, the on_change_hook is called with485# (database, old_val, new_val, account_id, cb)486# after everything the database has been modified.487r.on_change_hook = r.client_query.set.on_change488489# 4. instead of query490r.instead_of_query = r.client_query.set.instead_of_query491492#dbg("on_change_hook=#{on_change_hook?}, #{misc.to_json(misc.keys(client_query.set))}")493494# Set the query options -- order doesn't matter for set queries (unlike for get), so we495# just merge the options into a single dictionary.496# NOTE: As I write this, there is just one supported option: {delete:true}.497r.options = {}498if r.client_query.set.options?499for x in r.client_query.set.options500for y, z of x501r.options[y] = z502if opts.options?503for x in opts.options504for y, z of x505r.options[y] = z506dbg("options = #{misc.to_json(r.options)}")507508if r.options.delete and not r.client_query.set.delete509# delete option is set, but deletes aren't explicitly allowed on this table. ERROR.510return {err: "FATAL: delete from #{r.table} not allowed"}511512return r513514_user_set_query_enforce_requirements: (r, cb) =>515async.parallel([516(cb) =>517if r.require_admin518@_require_is_admin(r.account_id, cb)519else520cb()521(cb) =>522if r.require_project_ids_write_access?523if r.project_id?524err = undefined525for x in r.require_project_ids_write_access526if x != r.project_id527err = "FATAL: can only query same project"528break529cb(err)530else531@_require_project_ids_in_groups(r.account_id, r.require_project_ids_write_access,\532['owner', 'collaborator'], cb)533else534cb()535(cb) =>536if r.require_project_ids_owner?537@_require_project_ids_in_groups(r.account_id, r.require_project_ids_owner,\538['owner'], cb)539else540cb()541], cb)542543_user_set_query_where: (r) =>544where = {}545for primary_key in @_primary_keys(r.db_table)546value = r.query[primary_key]547if SCHEMA[r.db_table].fields[primary_key].noCoerce548where["#{primary_key}=$"] = value549else550type = pg_type(SCHEMA[r.db_table].fields[primary_key])551if type == 'TIMESTAMP' and not misc.is_date(value)552# Javascript is better at parsing its own dates than PostgreSQL553# isNaN test so NOW(), etc. work still554x = new Date(value)555if not isNaN(x)556value = x557where["#{primary_key}=$::#{type}"] = value558return where559560_user_set_query_values: (r) =>561values = {}562s = SCHEMA[r.db_table]563for key, value of r.query564type = pg_type(s?.fields?[key])565if value? and type? and not s?.fields?[key]?.noCoerce566if type == 'TIMESTAMP' and not misc.is_date(value)567# (as above) Javascript is better at parsing its own dates than PostgreSQL568x = new Date(value)569if not isNaN(x)570value = x571values["#{key}::#{type}"] = value572else573values[key] = value574return values575576_user_set_query_hooks_prepare: (r, cb) =>577if r.on_change_hook? or r.before_change_hook? or r.instead_of_change_hook?578for primary_key in r.primary_keys579if not r.query[primary_key]?580# this is fine -- it just means the old_val isn't defined.581# this can happen, e.g., when creating a new object with a primary key that is a generated id.582cb()583return584# get the old value before changing it585# TODO: optimization -- can we restrict columns below?586@_query587query : "SELECT * FROM #{r.db_table}"588where : @_user_set_query_where(r)589cb : one_result (err, x) =>590r.old_val = x; cb(err)591else592cb()593594_user_query_set_count: (r, cb) =>595@_query596query : "SELECT COUNT(*) FROM #{r.db_table}"597where : @_user_set_query_where(r)598cb : count_result(cb)599600_user_query_set_delete: (r, cb) =>601@_query602query : "DELETE FROM #{r.db_table}"603where : @_user_set_query_where(r)604cb : cb605606_user_set_query_conflict: (r) =>607return r.primary_keys608609_user_query_set_upsert: (r, cb) =>610# r.dbg("_user_query_set_upsert #{JSON.stringify(r.query)}")611@_query612query : "INSERT INTO #{r.db_table}"613values : @_user_set_query_values(r)614conflict : @_user_set_query_conflict(r)615cb : cb616617# Record is already in DB, so we update it:618# this function handles a case that involves both619# a jsonb_merge and an update.620_user_query_set_upsert_and_jsonb_merge: (r, cb) =>621jsonb_merge = {}622for k of r.json_fields623v = r.query[k]624if v?625jsonb_merge[k] = v626set = {}627for k, v of r.query628if k not in r.primary_keys and not jsonb_merge[k]?629set[k] = v630@_query631query : "UPDATE #{r.db_table}"632jsonb_merge : jsonb_merge633set : set634where : @_user_set_query_where(r)635cb : cb636637_user_set_query_main_query: (r, cb) =>638r.dbg("_user_set_query_main_query")639640if not r.client_query.set.allow_field_deletes641# allow_field_deletes not set, so remove any null/undefined642# fields from the query643for key of r.query644if not r.query[key]?645delete r.query[key]646647if r.options.delete648for primary_key in r.primary_keys649if not r.query[primary_key]?650cb("FATAL: delete query must set primary key")651return652r.dbg("delete based on primary key")653@_user_query_set_delete(r, cb)654return655if r.instead_of_change_hook?656r.instead_of_change_hook(@, r.old_val, r.query, r.account_id, cb)657else658if misc.len(r.json_fields) == 0659# easy case -- there are no jsonb merge fields; just do an upsert.660@_user_query_set_upsert(r, cb)661return662# HARD CASE -- there are json_fields... so we are doing an insert663# if the object isn't already in the database, and an update664# if it is. This is ugly because I don't know how to do both665# a JSON merge as an upsert.666cnt = undefined # will equal number of records having the primary key (so 0 or 1)667async.series([668(cb) =>669@_user_query_set_count r, (err, n) =>670cnt = n; cb(err)671(cb) =>672r.dbg("do the set query")673if cnt == 0674# Just insert (do as upsert to avoid error in case of race)675@_user_query_set_upsert(r, cb)676else677# Do as an update -- record is definitely already in db since cnt > 0.678# This would fail in the unlikely (but possible) case that somebody deletes679# the record between the above count and when we do the UPDATE.680# Using a transaction could avoid this.681# Maybe such an error is reasonable and it's good to report it as such.682@_user_query_set_upsert_and_jsonb_merge(r, cb)683], cb)684685user_set_query: (opts) =>686opts = defaults opts,687account_id : undefined688project_id : undefined689table : required690query : required691options : undefined # options=[{delete:true}] is the only supported nontrivial option here.692cb : required # cb(err)693694# TODO: it would be nice to return the primary key part of the created object on creation.695# That's not implemented and will be somewhat nontrivial, and will use the RETURNING clause696# of postgres's INSERT - https://www.postgresql.org/docs/current/sql-insert.html697698if @is_standby699opts.cb("set queries against standby not allowed")700return701r = @_parse_set_query_opts(opts)702703# Only uncomment for debugging -- too big/verbose/dangerous704# r.dbg("parsed query opts = #{JSON.stringify(r)}")705706if not r? # nothing to do707opts.cb()708return709if r.err710opts.cb(r.err)711return712713async.series([714(cb) =>715@_user_set_query_enforce_requirements(r, cb)716(cb) =>717if r.check_hook?718r.check_hook(@, r.query, r.account_id, r.project_id, cb)719else720cb()721(cb) =>722@_user_set_query_hooks_prepare(r, cb)723(cb) =>724if r.before_change_hook?725r.before_change_hook @, r.old_val, r.query, r.account_id, (err, stop) =>726r.done = stop727cb(err)728else729cb()730(cb) =>731if r.done732cb()733return734if r.instead_of_query?735opts1 = misc.copy_without(opts, ['cb', 'changes', 'table'])736r.instead_of_query(@, opts1, cb)737else738@_user_set_query_main_query(r, cb)739(cb) =>740if r.done741cb()742return743if r.on_change_hook?744r.on_change_hook(@, r.old_val, r.query, r.account_id, cb)745else746cb()747], (err) => opts.cb(err))748749# mod_fields counts the fields in query that might actually get modified750# in the database when we do the query; e.g., account_id won't since it gets751# filled in with the user's account_id, and project_write won't since it must752# refer to an existing project. We use mod_field **only** to skip doing753# no-op queries. It's just an optimization.754_mod_fields: (query, client_query) =>755for field in misc.keys(query)756if client_query.set.fields[field] not in ['account_id', 'project_write']757return true758return false759760_user_get_query_json_timestamps: (obj, fields) =>761# obj is an object returned from the database via a query762# Postgres JSONB doesn't support timestamps, so we convert763# every json leaf node of obj that looks like JSON of a timestamp764# to a Javascript Date.765for k, v of obj766if fields[k]767obj[k] = misc.fix_json_dates(v, fields[k])768769# fill in the default values for obj using the client_query spec.770_user_get_query_set_defaults: (client_query, obj, fields) =>771if not misc.is_array(obj)772obj = [obj]773else if obj.length == 0774return775s = client_query?.get?.fields ? {}776for k in fields777v = s[k]778if v?779# k is a field for which a default value (=v) is provided in the schema780for x in obj781# For each obj pulled from the database that is defined...782if x?783# We check to see if the field k was set on that object.784y = x[k]785if not y?786# It was NOT set, so we deep copy the default value for the field k.787x[k] = misc.deep_copy(v)788else if typeof(v) == 'object' and typeof(y) == 'object' and not misc.is_array(v)789# y *is* defined and is an object, so we merge in the provided defaults.790for k0, v0 of v791if not y[k0]?792y[k0] = v0793794_user_set_query_project_users: (obj, account_id) =>795dbg = @_dbg("_user_set_query_project_users")796if not obj.users?797# nothing to do -- not changing users.798return799##dbg("disabled")800##return obj.users801# - ensures all keys of users are valid uuid's (though not that they are valid users).802# - and format is:803# {group:'owner' or 'collaborator', hide:bool, upgrades:{a map}}804# with valid upgrade fields.805upgrade_fields = PROJECT_UPGRADES.params806users = {}807# TODO: we obviously should check that a user is only changing the part808# of this object involving themselves... or adding/removing collaborators.809# That is not currently done below. TODO TODO TODO SECURITY.810for id, x of obj.users811if misc.is_valid_uuid_string(id)812for key in misc.keys(x)813if key not in ['group', 'hide', 'upgrades', 'ssh_keys']814throw Error("unknown field '#{key}")815if x.group? and (x.group not in ['owner', 'collaborator'])816throw Error("invalid value for field 'group'")817if x.hide? and typeof(x.hide) != 'boolean'818throw Error("invalid type for field 'hide'")819if x.upgrades?820if not misc.is_object(x.upgrades)821throw Error("invalid type for field 'upgrades'")822for k,_ of x.upgrades823if not upgrade_fields[k]824throw Error("invalid upgrades field '#{k}'")825if x.ssh_keys826# do some checks.827if not misc.is_object(x.ssh_keys)828throw Error("ssh_keys must be an object")829for fingerprint, key of x.ssh_keys830if not key # deleting831continue832if not misc.is_object(key)833throw Error("each key in ssh_keys must be an object")834for k, v of key835# the two dates are just numbers not actual timestamps...836if k not in ['title', 'value', 'creation_date', 'last_use_date']837throw Error("invalid ssh_keys field '#{k}'")838users[id] = x839return users840841project_action: (opts) =>842opts = defaults opts,843project_id : required844action_request : required # action is object {action:?, time:?}845cb : required846if opts.action_request.action == 'test'847# used for testing -- shouldn't trigger anything to happen.848opts.cb()849return850dbg = @_dbg("project_action(project_id='#{opts.project_id}',action_request=#{misc.to_json(opts.action_request)})")851dbg()852project = undefined853action_request = misc.copy(opts.action_request)854set_action_request = (cb) =>855dbg("set action_request to #{misc.to_json(action_request)}")856@_query857query : "UPDATE projects"858where : 'project_id = $::UUID':opts.project_id859jsonb_set : {action_request : action_request}860cb : cb861async.series([862(cb) =>863action_request.started = new Date()864set_action_request(cb)865(cb) =>866dbg("get project")867try868project = await @projectControl(opts.project_id)869cb()870catch err871cb(err)872(cb) =>873dbg("doing action")874try875switch action_request.action876when 'restart'877await project.restart()878when 'stop'879await project.stop()880when 'start'881await project.start()882else883throw Error("FATAL: action '#{opts.action_request.action}' not implemented")884cb()885catch err886cb(err)887], (err) =>888if err889action_request.err = err890action_request.finished = new Date()891dbg("finished!")892set_action_request(opts.cb)893)894895# This hook is called *before* the user commits a change to a project in the database896# via a user set query.897# TODO: Add a pre-check here as well that total upgrade isn't going to be exceeded.898# This will avoid a possible subtle edge case if user is cheating and always somehow899# crashes server...?900_user_set_query_project_change_before: (old_val, new_val, account_id, cb) =>901#dbg = @_dbg("_user_set_query_project_change_before #{account_id}, #{misc.to_json(old_val)} --> #{misc.to_json(new_val)}")902# I've seen MASSIVE OUTPUT from this, e.g., when setting avatar.903dbg = @_dbg("_user_set_query_project_change_before #{account_id}")904dbg()905906if new_val?.name and (new_val?.name != old_val?.name)907# Changing or setting the name of the project to something nontrivial.908try909checkProjectName(new_val.name);910catch err911cb(err.toString())912return913if new_val.name914# Setting name to something nontrivial, so we must check uniqueness915# among all projects this user owns.916result = await callback2 @_query,917query : 'SELECT COUNT(*) FROM projects'918where :919"users#>>'{#{account_id},group}' = $::TEXT" : 'owner'920"project_id != $::UUID" : new_val.project_id921"LOWER(name) = $::TEXT":new_val.name.toLowerCase()922if result.rows[0].count > 0923cb("There is already a project with the same owner as this project and name='#{new_val.name}'. Names are not case sensitive.")924return925# A second constraint is that only the project owner can change the project name.926result = await callback2 @_query,927query : 'SELECT COUNT(*) FROM projects'928where :929"users#>>'{#{account_id},group}' = $::TEXT" : 'owner'930"project_id = $::UUID" : new_val.project_id931if result.rows[0].count == 0932cb("Only the owner of the project can currently change the project name.")933return934935if new_val?.action_request? and JSON.stringify(new_val.action_request.time) != JSON.stringify(old_val?.action_request?.time)936# Requesting an action, e.g., save, restart, etc.937dbg("action_request -- #{misc.to_json(new_val.action_request)}")938#939# WARNING: Above, we take the difference of times below, since != doesn't work as we want with940# separate Date objects, as it will say equal dates are not equal. Example:941# coffee> x = JSON.stringify(new Date()); {from_json}=require('misc'); a=from_json(x); b=from_json(x); [a!=b, a-b]942# [ true, 0 ]943944# Launch the action -- success or failure communicated back to all clients through changes to state.945# Also, we don't have to worry about permissions here; that this function got called at all means946# the user has write access to the projects table entry with given project_id, which gives them permission947# to do any action with the project.948@project_action949project_id : new_val.project_id950action_request : misc.copy_with(new_val.action_request, ['action', 'time'])951cb : (err) =>952dbg("action_request #{misc.to_json(new_val.action_request)} completed -- #{err}")953# true means -- do nothing further. We don't want to the user to954# set this same thing since we already dealt with it properly.955cb(err, true)956return957958if not new_val.users? # not changing users959cb(); return960old_val = old_val?.users ? {}961new_val = new_val?.users ? {}962for id in misc.keys(old_val).concat(new_val)963if account_id != id964# make sure user doesn't change anybody else's allocation965if not lodash.isEqual(old_val?[id]?.upgrades, new_val?[id]?.upgrades)966err = "FATAL: user '#{account_id}' tried to change user '#{id}' allocation toward a project"967dbg(err)968cb(err)969return970cb()971972# This hook is called *after* the user commits a change to a project in the database973# via a user set query. It could undo changes the user isn't allowed to make, which974# might require doing various async calls, or take actions (e.g., setting quotas,975# starting projects, etc.).976_user_set_query_project_change_after: (old_val, new_val, account_id, cb) =>977dbg = @_dbg("_user_set_query_project_change_after #{account_id}, #{misc.to_json(old_val)} --> #{misc.to_json(new_val)}")978dbg()979old_upgrades = old_val.users?[account_id]?.upgrades980new_upgrades = new_val.users?[account_id]?.upgrades981if new_upgrades? and not lodash.isEqual(old_upgrades, new_upgrades)982dbg("upgrades changed for #{account_id} from #{misc.to_json(old_upgrades)} to #{misc.to_json(new_upgrades)}")983project = undefined984async.series([985(cb) =>986@ensure_user_project_upgrades_are_valid987account_id : account_id988cb : cb989(cb) =>990if not @projectControl?991cb()992else993dbg("get project")994try995project = await @projectControl(new_val.project_id)996cb()997catch err998cb(err)999(cb) =>1000if not project?1001cb()1002else1003dbg("determine total quotas and apply")1004try1005await project.setAllQuotas()1006cb()1007catch err1008cb(err)1009], cb)1010else1011cb()10121013###1014GET QUERIES1015###10161017# Make any functional substitutions defined by the schema.1018# This may mutate query in place.1019_user_get_query_functional_subs: (query, fields) =>1020if fields?1021for field, val of fields1022if typeof(val) == 'function'1023query[field] = val(query, @)10241025_parse_get_query_opts: (opts) =>1026if opts.changes? and not opts.changes.cb?1027return {err: "FATAL: user_get_query -- if opts.changes is specified, then opts.changes.cb must also be specified"}10281029r = {}1030# get data about user queries on this table1031if opts.project_id?1032r.client_query = SCHEMA[opts.table]?.project_query1033else1034r.client_query = SCHEMA[opts.table]?.user_query10351036if not r.client_query?.get?1037return {err: "FATAL: get queries not allowed for table '#{opts.table}'"}10381039if not opts.account_id? and not opts.project_id? and not SCHEMA[opts.table].anonymous1040return {err: "FATAL: anonymous get queries not allowed for table '#{opts.table}'"}10411042r.table = SCHEMA[opts.table].virtual ? opts.table10431044r.primary_keys = @_primary_keys(opts.table)10451046# Are only admins allowed any get access to this table?1047r.require_admin = !!r.client_query.get.admin10481049# Verify that all requested fields may be read by users1050for field in misc.keys(opts.query)1051if r.client_query.get.fields?[field] == undefined1052return {err: "FATAL: user get query not allowed for #{opts.table}.#{field}"}10531054# Functional substitutions defined by schema1055@_user_get_query_functional_subs(opts.query, r.client_query.get?.fields)10561057if r.client_query.get?.instead_of_query?1058return r10591060# Sanity check: make sure there is something in the query1061# that gets only things in this table that this user1062# is allowed to see, or at least a check_hook. This is not required1063# for admins.1064if not r.client_query.get.pg_where? and not r.client_query.get.check_hook? and not r.require_admin1065return {err: "FATAL: user get query not allowed for #{opts.table} (no getAll filter - pg_where or check_hook)"}10661067# Apply default options to the get query (don't impact changefeed)1068# The user can override these, e.g., if they were to want to explicitly increase a limit1069# to get more file use history.1070user_options = {}1071for x in opts.options1072for y, z of x1073user_options[y] = true10741075get_options = undefined1076if @is_heavily_loaded() and r.client_query.get.options_load?1077get_options = r.client_query.get.options_load1078else if r.client_query.get.options?1079get_options = r.client_query.get.options1080if get_options?1081# complicated since options is a list of {opt:val} !1082for x in get_options1083for y, z of x1084if not user_options[y]1085opts.options.push(x)1086break10871088r.json_fields = @_json_fields(opts.table, opts.query)1089return r10901091# _json_fields: map from field names to array of fields that should be parsed as timestamps1092# These keys of his map are also used by _user_query_set_upsert_and_jsonb_merge to determine1093# JSON deep merging for set queries.1094_json_fields: (table, query) =>1095json_fields = {}1096for field, info of SCHEMA[table].fields1097if (query[field]? or query[field] == null) and (info.type == 'map' or info.pg_type == 'JSONB')1098json_fields[field] = info.date ? []1099return json_fields11001101_user_get_query_where: (client_query, account_id, project_id, user_query, table, cb) =>1102dbg = @_dbg("_user_get_query_where")1103dbg()11041105pg_where = client_query.get.pg_where11061107if @is_heavily_loaded() and client_query.get.pg_where_load?1108# use a different query if load is heavy1109pg_where = client_query.get.pg_where_load11101111if not pg_where?1112pg_where = []1113if pg_where == 'projects'1114pg_where = ['projects']11151116if typeof(pg_where) == 'function'1117pg_where = pg_where(user_query, @)1118if not misc.is_array(pg_where)1119cb("FATAL: pg_where must be an array (of strings or objects)")1120return11211122# Do NOT mutate the schema itself!1123pg_where = misc.deep_copy(pg_where)11241125# expand 'projects' in query, depending on whether project_id is specified or not.1126# This is just a convenience to make the db schema simpler.1127for i in [0...pg_where.length]1128if pg_where[i] == 'projects'1129if user_query.project_id1130pg_where[i] = {"project_id = $::UUID" : 'project_id'}1131else1132pg_where[i] = {"project_id = ANY(select project_id from projects where users ? $::TEXT)" : 'account_id'}11331134# Now we fill in all the parametrized substitutions in the pg_where list.1135subs = {}1136for x in pg_where1137if misc.is_object(x)1138for _, value of x1139subs[value] = value11401141sub_value = (value, cb) =>1142switch value1143when 'account_id'1144if not account_id?1145cb('FATAL: account_id must be given')1146return1147subs[value] = account_id1148cb()1149when 'project_id'1150if project_id?1151subs[value] = project_id1152cb()1153else if not user_query.project_id1154cb("FATAL: must specify project_id")1155else if SCHEMA[table].anonymous1156subs[value] = user_query.project_id1157cb()1158else1159@user_is_in_project_group1160account_id : account_id1161project_id : user_query.project_id1162groups : ['owner', 'collaborator']1163cb : (err, in_group) =>1164if err1165cb(err)1166else if in_group1167subs[value] = user_query.project_id1168cb()1169else1170cb("FATAL: you do not have read access to this project -- account_id=#{account_id}, project_id_=#{project_id}")1171when 'project_id-public'1172if not user_query.project_id?1173cb("FATAL: must specify project_id")1174else1175if SCHEMA[table].anonymous1176@has_public_path1177project_id : user_query.project_id1178cb : (err, has_public_path) =>1179if err1180cb(err)1181else if not has_public_path1182cb("project does not have any public paths")1183else1184subs[value] = user_query.project_id1185cb()1186else1187cb("FATAL: table must allow anonymous queries")1188else1189cb()11901191async.map misc.keys(subs), sub_value, (err) =>1192if err1193cb(err)1194return1195for x in pg_where1196if misc.is_object(x)1197for key, value of x1198x[key] = subs[value]11991200# impose further restrictions (more where conditions)1201pg_where.push(userGetQueryFilter(user_query, client_query))12021203cb(undefined, pg_where)12041205_user_get_query_options: (options, multi, schema_options) =>1206r = {}12071208if schema_options?1209options = options.concat(schema_options)12101211# Parse option part of the query1212{limit, order_by, slice, only_changes, err} = @_query_parse_options(options)12131214if err1215return {err: err}1216if only_changes1217r.only_changes = true1218if limit?1219r.limit = limit1220else if not multi1221r.limit = 11222if order_by?1223r.order_by = order_by1224if slice?1225return {err: "slice not implemented"}1226return r12271228_user_get_query_do_query: (query_opts, client_query, user_query, multi, json_fields, cb) =>1229query_opts.cb = all_results (err, x) =>1230if err1231cb(err)1232else1233if misc.len(json_fields) > 01234# Convert timestamps to Date objects, if **explicitly** specified in the schema1235for obj in x1236@_user_get_query_json_timestamps(obj, json_fields)12371238if not multi1239x = x[0]1240# Fill in default values and remove null's1241@_user_get_query_set_defaults(client_query, x, misc.keys(user_query))1242# Get rid of undefined fields -- that's the default and wastes memory and bandwidth1243if x?1244for obj in x1245misc.map_mutate_out_undefined_and_null(obj)1246cb(undefined, x)1247@_query(query_opts)12481249_user_get_query_query: (table, user_query, remove_from_query) =>1250return "SELECT #{(quote_field(field) for field in @_user_get_query_columns(user_query, remove_from_query)).join(',')} FROM #{table}"12511252_user_get_query_satisfied_by_obj: (user_query, obj, possible_time_fields) =>1253#dbg = @_dbg("_user_get_query_satisfied_by_obj)1254#dbg(user_query, obj)1255for field, value of obj1256date_keys = possible_time_fields[field]1257if date_keys1258value = misc.fix_json_dates(value, date_keys)1259if (q = user_query[field])?1260if (op = queryIsCmp(q))1261#dbg(value:value, op: op, q:q)1262x = q[op]1263switch op1264when '=='1265if value != x1266return false1267when '!='1268if value == x1269return false1270when '>='1271if value < x1272return false1273when '<='1274if value > x1275return false1276when '>'1277if value <= x1278return false1279when '<'1280if value >= x1281return false1282else if value != q1283return false1284return true12851286_user_get_query_handle_field_deletes: (client_query, new_val) =>1287if client_query.get.allow_field_deletes1288# leave in the nulls that might be in new_val1289return1290# remove all nulls from new_val. Right now we1291# just can't support this due to default values.1292# TODO: completely get rid of default values (?) or1293# maybe figure out how to implement this. The symptom1294# of not doing this is a normal user will do things like1295# delete the users field of their projects. Not good.1296for key of new_val1297if not new_val[key]?1298delete new_val[key]12991300_user_get_query_changefeed: (changes, table, primary_keys, user_query,1301where, json_fields, account_id, client_query, orig_table, cb) =>1302dbg = @_dbg("_user_get_query_changefeed(table='#{table}')")1303dbg()1304# WARNING: always call changes.cb! Do not do something like f = changes.cb, then call f!!!!1305# This is because the value of changes.cb may be changed by the caller.1306if not misc.is_object(changes)1307cb("FATAL: changes must be an object with keys id and cb")1308return1309if not misc.is_valid_uuid_string(changes.id)1310cb("FATAL: changes.id must be a uuid")1311return1312if typeof(changes.cb) != 'function'1313cb("FATAL: changes.cb must be a function")1314return1315for primary_key in primary_keys1316if not user_query[primary_key]? and user_query[primary_key] != null1317cb("FATAL: changefeed MUST include primary key (='#{primary_key}') in query")1318return1319watch = []1320select = {}1321init_tracker = tracker = free_tracker = undefined1322possible_time_fields = misc.deep_copy(json_fields)1323feed = undefined13241325changefeed_keys = SCHEMA[orig_table]?.changefeed_keys ? SCHEMA[table]?.changefeed_keys ? []1326for field, val of user_query1327type = pg_type(SCHEMA[table]?.fields?[field])1328if type == 'TIMESTAMP'1329possible_time_fields[field] = 'all'1330if val == null and field not in primary_keys and field not in changefeed_keys1331watch.push(field)1332else1333select[field] = type13341335if misc.len(possible_time_fields) > 01336# Convert (likely) timestamps to Date objects; fill in defaults for inserts1337process = (x) =>1338if not x?1339return1340if x.new_val?1341@_user_get_query_json_timestamps(x.new_val, possible_time_fields)1342if x.action == 'insert' # do not do this for delete or update actions!1343@_user_get_query_set_defaults(client_query, x.new_val, misc.keys(user_query))1344else if x.action == 'update'1345@_user_get_query_handle_field_deletes(client_query, x.new_val)1346if x.old_val?1347@_user_get_query_json_timestamps(x.old_val, possible_time_fields)1348else1349process = (x) =>1350if not x?1351return1352if x.new_val?1353if x.action == 'insert' # do not do this for delete or update actions!1354@_user_get_query_set_defaults(client_query, x.new_val, misc.keys(user_query))1355else if x.action == 'update'1356@_user_get_query_handle_field_deletes(client_query, x.new_val)13571358async.series([1359(cb) =>1360# check for alternative where test for changefeed.1361pg_changefeed = client_query?.get?.pg_changefeed1362if not pg_changefeed?1363cb(); return13641365if pg_changefeed == 'projects'1366tracker_add = (project_id) => feed.insert({project_id:project_id})1367tracker_remove = (project_id) => feed.delete({project_id:project_id})13681369# Any tracker error means this changefeed is now broken and1370# has to be recreated.1371tracker_error = () => changes.cb("tracker error - ${err}")13721373pg_changefeed = (db, account_id) =>1374where : (obj) =>1375# Check that this is a project we have read access to1376if not db._project_and_user_tracker?.get_projects(account_id)[obj.project_id]1377return false1378# Now check our actual query conditions on the object.1379# This would normally be done by the changefeed, but since1380# we are passing in a custom where, we have to do it.1381if not @_user_get_query_satisfied_by_obj(user_query, obj, possible_time_fields)1382return false1383return true13841385select : {'project_id':'UUID'}13861387init_tracker : (tracker) =>1388tracker.on "add_user_to_project-#{account_id}", tracker_add1389tracker.on "remove_user_from_project-#{account_id}", tracker_remove1390tracker.once 'error', tracker_error139113921393free_tracker : (tracker) =>1394dbg("freeing project tracker events")1395tracker.removeListener("add_user_to_project-#{account_id}", tracker_add)1396tracker.removeListener("remove_user_from_project-#{account_id}", tracker_remove)1397tracker.removeListener("error", tracker_error)139813991400else if pg_changefeed == 'news'1401pg_changefeed = ->1402where : (obj) ->1403if obj.date?1404date_obj = new Date(obj.date)1405# we send future news items to the frontend, but filter it based on the server time1406return date_obj >= misc.months_ago(3)1407else1408return true1409select : {id: 'SERIAL UNIQUE', date: 'TIMESTAMP'}14101411else if pg_changefeed == 'one-hour'1412pg_changefeed = ->1413where : (obj) ->1414if obj.time?1415return new Date(obj.time) >= misc.hours_ago(1)1416else1417return true1418select : {id:'UUID', time:'TIMESTAMP'}14191420else if pg_changefeed == 'five-minutes'1421pg_changefeed = ->1422where : (obj) ->1423if obj.time?1424return new Date(obj.time) >= misc.minutes_ago(5)1425else1426return true1427select : {id:'UUID', time:'TIMESTAMP'}14281429else if pg_changefeed == 'collaborators'1430if not account_id?1431cb("FATAL: account_id must be given")1432return1433tracker_add = (collab_id) => feed.insert({account_id:collab_id})1434tracker_remove = (collab_id) => feed.delete({account_id:collab_id})1435tracker_error = () => changes.cb("tracker error - ${err}")1436pg_changefeed = (db, account_id) ->1437shared_tracker = undefined1438where : (obj) -> # test of "is a collab with me"1439return shared_tracker.get_collabs(account_id)?[obj.account_id]1440init_tracker : (tracker) =>1441shared_tracker = tracker1442tracker.on "add_collaborator-#{account_id}", tracker_add1443tracker.on "remove_collaborator-#{account_id}", tracker_remove1444tracker.once 'error', tracker_error1445free_tracker : (tracker) =>1446dbg("freeing collab tracker events")1447tracker.removeListener("add_collaborator-#{account_id}", tracker_add)1448tracker.removeListener("remove_collaborator-#{account_id}", tracker_remove)1449tracker.removeListener("error", tracker_error)145014511452x = pg_changefeed(@, account_id)1453if x.init_tracker?1454init_tracker = x.init_tracker1455if x.free_tracker?1456free_tracker = x.free_tracker1457if x.select?1458for k, v of x.select1459select[k] = v14601461if x.where? or x.init_tracker?1462where = x.where1463if not account_id?1464cb()1465return1466# initialize user tracker is needed for where tests...1467@project_and_user_tracker cb : (err, _tracker) =>1468if err1469cb(err)1470else1471tracker = _tracker1472try1473await tracker.register(account_id)1474cb()1475catch err1476cb(err)1477else1478cb()1479(cb) =>1480@changefeed1481table : table1482select : select1483where : where1484watch : watch1485cb : (err, _feed) =>1486# there *is* a glboal variable feed that we set here:1487feed = _feed1488if err1489cb(err)1490return1491feed.on 'change', (x) ->1492process(x)1493changes.cb(undefined, x)1494feed.on 'close', ->1495changes.cb(undefined, {action:'close'})1496dbg("feed close")1497if tracker? and free_tracker?1498dbg("free_tracker")1499free_tracker(tracker)1500else1501dbg("do NOT free_tracker")1502feed.on 'error', (err) ->1503changes.cb("feed error - #{err}")1504@_changefeeds ?= {}1505@_changefeeds[changes.id] = feed1506init_tracker?(tracker)1507cb()1508], cb)15091510user_get_query: (opts) =>1511opts = defaults opts,1512account_id : undefined1513project_id : undefined1514table : required1515query : required1516multi : required1517options : required # used for initial query; **IGNORED** by changefeed,1518# which ensures that *something* is sent every n minutes, in case no1519# changes are coming out of the changefeed. This is an additional1520# measure in case the client somehow doesn't get a "this changefeed died" message.1521# Use [{delete:true}] to instead delete the selected records (must1522# have delete:true in schema).1523changes : undefined # {id:?, cb:?}1524cb : required # cb(err, result)1525###1526The general idea is that user get queries are of the form15271528SELECT [columns] FROM table WHERE [get_all] AND [further restrictions] LIMIT/slice15291530Using the whitelist rules specified in SCHEMA, we1531determine each of the above, then run the query.15321533If no error in query, and changes is a given uuid, set up a change1534feed that calls opts.cb on changes as well.1535###1536id = misc.uuid().slice(0,6)1537#dbg = @_dbg("user_get_query(id=#{id})")1538dbg = -> # Logging below is just too verbose, and turns out to not be useful...1539dbg("account_id='#{opts.account_id}', project_id='#{opts.project_id}', query=#{misc.to_json(opts.query)}, multi=#{opts.multi}, options=#{misc.to_json(opts.options)}, changes=#{misc.to_json(opts.changes)}")1540{err, table, client_query, require_admin, primary_keys, json_fields} = @_parse_get_query_opts(opts)15411542if err1543dbg("error parsing query opts -- #{err}")1544opts.cb(err)1545return15461547_query_opts = {} # this will be the input to the @_query command.1548locals =1549result : undefined1550changes_cb : undefined15511552async.series([1553(cb) =>1554if client_query.get.check_hook?1555dbg("do check hook")1556client_query.get.check_hook(@, opts.query, opts.account_id, opts.project_id, cb)1557else1558cb()1559(cb) =>1560if require_admin1561dbg('require admin')1562@_require_is_admin(opts.account_id, cb)1563else1564cb()1565(cb) =>1566# NOTE: _user_get_query_where may mutate opts.query (for 'null' params)1567# so it is important that this is called before @_user_get_query_query below.1568# See the TODO in userGetQueryFilter.1569dbg("get_query_where")1570@_user_get_query_where client_query, opts.account_id, opts.project_id, opts.query, opts.table, (err, where) =>1571_query_opts.where = where1572cb(err)1573(cb) =>1574if client_query.get.instead_of_query?1575cb();1576return1577_query_opts.query = @_user_get_query_query(table, opts.query, client_query.get.remove_from_query)1578x = @_user_get_query_options(opts.options, opts.multi, client_query.options)1579if x.err1580dbg("error in get_query_options, #{x.err}")1581cb(x.err)1582return1583misc.merge(_query_opts, x)15841585nestloop = SCHEMA[opts.table]?.pg_nestloop # true, false or undefined1586if typeof nestloop == 'boolean'1587val = if nestloop then 'on' else 'off'1588_query_opts.pg_params = {enable_nestloop : val}15891590indexscan = SCHEMA[opts.table]?.pg_indexscan # true, false or undefined1591if typeof indexscan == 'boolean'1592val = if indexscan then 'on' else 'off'1593_query_opts.pg_params = {enable_indexscan : val}15941595if opts.changes?1596locals.changes_cb = opts.changes.cb1597locals.changes_queue = []1598# see note about why we do the following at the bottom of this file1599opts.changes.cb = (err, obj) ->1600locals.changes_queue.push({err:err, obj:obj})1601dbg("getting changefeed")1602@_user_get_query_changefeed(opts.changes, table, primary_keys,1603opts.query, _query_opts.where, json_fields,1604opts.account_id, client_query, opts.table,1605cb)1606else1607cb()16081609(cb) =>1610if client_query.get.instead_of_query?1611if opts.changes?1612cb("changefeeds are not supported for querying this table")1613return1614# Custom version: instead of doing a full query, we instead1615# call a function and that's it.1616dbg("do instead_of_query instead")1617opts1 = misc.copy_without(opts, ['cb', 'changes', 'table'])1618client_query.get.instead_of_query @, opts1, (err, result) =>1619locals.result = result1620cb(err)1621return16221623if _query_opts.only_changes1624dbg("skipping query")1625locals.result = undefined1626cb()1627else1628dbg("finally doing query")1629@_user_get_query_do_query _query_opts, client_query, opts.query, opts.multi, json_fields, (err, result) =>1630if err1631cb(err)1632return1633locals.result = result1634cb()1635], (err) =>1636if err1637dbg("series failed -- err=#{err}")1638opts.cb(err)1639return1640dbg("series succeeded")1641opts.cb(undefined, locals.result)1642if opts.changes?1643dbg("sending change queue")1644opts.changes.cb = locals.changes_cb1645##dbg("sending queued #{JSON.stringify(locals.changes_queue)}")1646for {err, obj} in locals.changes_queue1647##dbg("sending queued changes #{JSON.stringify([err, obj])}")1648opts.changes.cb(err, obj)1649)16501651###1652Synchronized strings1653###1654_user_set_query_syncstring_change_after: (old_val, new_val, account_id, cb) =>1655dbg = @_dbg("_user_set_query_syncstring_change_after")1656cb() # return immediately -- stuff below can happen as side effect in the background.1657# Now do the following reactions to this syncstring change in the background:1658# 1. Awaken the relevant project.1659project_id = old_val?.project_id ? new_val?.project_id1660if project_id? and (new_val?.save?.state == 'requested' or (new_val?.last_active? and new_val?.last_active != old_val?.last_active))1661dbg("awakening project #{project_id}")1662awaken_project(@, project_id)166316641665# Verify that writing a patch is allowed.1666_user_set_query_patches_check: (obj, account_id, project_id, cb) =>1667# Reject any patch that is too new1668if obj.time - new Date() > MAX_PATCH_FUTURE_MS1669cb("clock") # this exact error is assumed in synctable!1670return1671# Write access1672@_syncstring_access_check(obj.string_id, account_id, project_id, cb)16731674# Verify that writing a patch is allowed.1675_user_get_query_patches_check: (obj, account_id, project_id, cb) =>1676# Write access (no notion of read only yet -- will be easy to add later)1677@_syncstring_access_check(obj.string_id, account_id, project_id, cb)16781679# Verify that writing a patch is allowed.1680_user_set_query_cursors_check: (obj, account_id, project_id, cb) =>1681@_syncstring_access_check(obj.string_id, account_id, project_id, cb)16821683# Verify that writing a patch is allowed.1684_user_get_query_cursors_check: (obj, account_id, project_id, cb) =>1685@_syncstring_access_check(obj.string_id, account_id, project_id, cb)16861687_syncstring_access_check: (string_id, account_id, project_id, cb) =>1688# Check that string_id is the id of a syncstring the given account_id or1689# project_id is allowed to write to. NOTE: We do not concern ourselves (for now at least)1690# with proof of identity (i.e., one user with full read/write access to a project1691# claiming they are another users of that SAME project), since our security model1692# is that any user of a project can edit anything there. In particular, the1693# synctable lets any user with write access to the project edit the users field.1694if string_id?.length != 401695cb("FATAL: string_id (='#{string_id}') must be a string of length 40")1696return1697@_query1698query : "SELECT project_id FROM syncstrings"1699where : "string_id = $::CHAR(40)" : string_id1700cache : false # *MUST* leave as false (not true), since unfortunately, if this returns no, due to FATAL below this would break opening the file until cache clears.1701cb : one_result 'project_id', (err, x) =>1702if err1703cb(err)1704else if not x1705# There is no such syncstring with this id -- fail1706cb("FATAL: no such syncstring")1707else if account_id?1708# Attempt to read or write by a user browser client1709@_require_project_ids_in_groups(account_id, [x], ['owner', 'collaborator'], cb)1710else if project_id?1711# Attempt to read or write by a *project*1712if project_id == x1713cb()1714else1715cb("FATAL: project not allowed to write to syncstring in different project")171617171718# Check permissions for querying for syncstrings in a project1719_syncstrings_check: (obj, account_id, project_id, cb) =>1720#dbg = @dbg("_syncstrings_check")1721#dbg(misc.to_json([obj, account_id, project_id]))1722if not misc.is_valid_uuid_string(obj?.project_id)1723cb("FATAL: project_id (='#{obj?.project_id}') must be a valid uuid")1724return1725if project_id?1726if project_id == obj.project_id1727# The project can access its own syncstrings1728cb()1729else1730cb("FATAL: projects can only access their own syncstrings") # for now at least!1731return1732if account_id?1733# Access request by a client user1734@_require_project_ids_in_groups(account_id, [obj.project_id], ['owner', 'collaborator'], cb)1735else1736cb("FATAL: only users and projects can access syncstrings")17371738# Other functions that are needed to implement various use queries,1739# e.g., for virtual queries like file_use_times.1740# ASYNC FUNCTION with no callback.1741updateRetentionData: (opts) =>1742return await updateRetentionData(opts)17431744_last_awaken_time = {}1745awaken_project = (db, project_id, cb) ->1746# throttle so that this gets called *for a given project* at most once every 30s.1747now = new Date()1748if _last_awaken_time[project_id]? and now - _last_awaken_time[project_id] < 300001749return1750_last_awaken_time[project_id] = now1751dbg = db._dbg("_awaken_project(project_id=#{project_id})")1752if not db.projectControl?1753dbg("skipping since no projectControl defined")1754return1755dbg("doing it...")1756async.series([1757(cb) ->1758try1759project = db.projectControl(project_id)1760await project.start()1761cb()1762catch err1763cb("error starting project = #{err}")1764(cb) ->1765if not db.ensure_connection_to_project?1766cb()1767return1768dbg("also make sure there is a connection from hub to project")1769# This is so the project can find out that the user wants to save a file (etc.)1770db.ensure_connection_to_project(project_id, cb)1771], (err) ->1772if err1773dbg("awaken project error -- #{err}")1774else1775dbg("success awakening project")1776cb?(err)1777)1778###1779Note about opts.changes.cb:17801781Regarding sync, what was happening I think is:1782- (a) https://github.com/sagemathinc/cocalc/blob/master/src/packages/hub/postgres-user-queries.coffee#L1384 starts sending changes1783- (b) https://github.com/sagemathinc/cocalc/blob/master/src/packages/hub/postgres-user-queries.coffee#L1393 sends the full table.17841785(a) could result in changes actually getting to the client before the table itself has been initialized. The client code assumes that it only gets changes *after* the table is initialized. The browser client seems to be smart enough that it detects this situation and resets itself, so the browser never gets messed up as a result.1786However, the project definitely does NOT do so well, and it can get messed up. Then it has a broken version of the table, missing some last minute change. It is broken until the project forgets about that table entirely, which is can be a pretty long time (or project restart).17871788My fix is to queue up those changes on the server, then only start sending them to the client **after** the (b) query is done. I tested this by using setTimeout to manually delay (b) for a few seconds, and fully seeing the "file won't save problem". The other approach would make it so clients are more robust against getting changes first. However, it would take a long time for all clients to update (restart all projects), and it's an annoying assumption to make in general -- we may have entirely new clients later and they could make the same bad assumptions about order...1789###179017911792