CoCalc -- llm-utils.ts

GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/packages/util/db-schema/llm-utils.ts
¹⁴⁴⁷ views
1
// this contains bits and pieces from the wrongly named openai.ts file
2

3
import { isEmpty } from "lodash";
4
import LRU from "lru-cache";
5

6
import { unreachable } from "@cocalc/util/misc";
7

8
// these can be defined by admins and users
9
export const SERVICES = [
10
  "openai",
11
  "google",
12
  "mistralai", // the "*ai" is deliberately, because their model names start with "mistral-..." and we have to distinguish it from the prefix
13
  "anthropic",
14
  "ollama",
15
  "custom_openai",
16
] as const;
17

18
// a "user-*" model is a wrapper for all the model services
19
export const LANGUAGE_MODEL_SERVICES = [...SERVICES, "user"] as const;
20

21
export type UserDefinedLLMService = (typeof SERVICES)[number];
22

23
export function isUserDefinedModelType(
24
  model: unknown,
25
): model is UserDefinedLLMService {
26
  return SERVICES.includes(model as any);
27
}
28

29
// "User LLMs" are defined in the user's account settings.
30
// They query an external LLM service of given type, endpoint, and API key.
31
export interface UserDefinedLLM {
32
  id: number; // a unique number
33
  service: UserDefinedLLMService;
34
  model: string; // non-empty string
35
  display: string; // short user-visible string
36
  endpoint: string; // URL to the LLM service
37
  apiKey: string;
38
  icon?: string; // https://.../...png
39
}
40

41
export const USER_LLM_PREFIX = "user-";
42

43
// This basically prefixes the "model" defined by the user with the USER and service prefix.
44
// We do not use the to*() functions, because the names of the models could be arbitrary – for each service
45
export function toUserLLMModelName(llm: UserDefinedLLM) {
46
  const { service } = llm;
47
  const model: string = (() => {
48
    switch (service) {
49
      case "custom_openai":
50
        return `${CUSTOM_OPENAI_PREFIX}${llm.model}`;
51
      case "ollama":
52
        return toOllamaModel(llm.model);
53
      case "anthropic":
54
        return `${ANTHROPIC_PREFIX}${llm.model}`;
55
      case "google":
56
        return `${GOOGLE_PREFIX}${llm.model}`;
57
      case "mistralai":
58
        return `${MISTRAL_PREFIX}${llm.model}`;
59
      case "openai":
60
        return `${OPENAI_PREFIX}${llm.model}`;
61
      default:
62
        unreachable(service);
63
        throw new Error(
64
          `toUserLLMModelName of service ${service} not supported`,
65
        );
66
    }
67
  })();
68
  return `${USER_LLM_PREFIX}${model}`;
69
}
70

71
export function fromUserDefinedLLMModel(m: string): string | null {
72
  if (isUserDefinedModel(m)) {
73
    return m.slice(USER_LLM_PREFIX.length);
74
  }
75
  return null;
76
}
77

78
export function isUserDefinedModel(model: unknown): boolean {
79
  if (typeof model !== "string") return false;
80
  if (model.startsWith(USER_LLM_PREFIX)) {
81
    const m2 = model.slice(USER_LLM_PREFIX.length);
82
    return SERVICES.some((svc) => m2.startsWith(`${svc}-`));
83
  }
84
  return false;
85
}
86

87
export function unpackUserDefinedLLMModel(model: string): {
88
  service: UserDefinedLLMService;
89
  model: string;
90
} | null {
91
  const um = fromUserDefinedLLMModel(model);
92
  if (um === null) return null;
93
  for (const service of SERVICES) {
94
    if (um.startsWith(`${service}-`)) {
95
      return { service, model: um.slice(service.length + 1) };
96
    }
97
  }
98
  return null;
99
}
100

101
export const OPENAI_PREFIX = "openai-";
102

103
// NOTE: all arrays of model names should order them by the "simples and fastest" to the "complex, slowest, most expensive"
104
// that way, the ordering the UI isn't looking arbitrary, but has a clear logic
105

106
export const MODELS_OPENAI = [
107
  "gpt-3.5-turbo",
108
  "gpt-4o-mini-8k", // context limited
109
  "gpt-4o-mini", // Released 2024-07-18
110
  "gpt-4o-8k", // context limited, similar to gpt-4-turbo-8k
111
  "gpt-4o", // Released 2024-05-13
112
  // the "preview" variants are disabled, because the preview is over
113
  "gpt-4-turbo-preview-8k", // like below, but artificially limited to 8k tokens
114
  "gpt-4-turbo-preview",
115
  "gpt-4-turbo-8k", // Released 2024-04-11
116
  "gpt-4-turbo",
117
  "gpt-4",
118
  "gpt-4.1",
119
  "gpt-4.1-mini",
120
  "gpt-4-32k",
121
  "gpt-3.5-turbo-16k",
122
  "text-embedding-ada-002", // TODO: this is for embeddings, should be moved to a different place
123
  "o1-mini-8k",
124
  "o1-mini",
125
  "o1-8k",
126
  "o1",
127
] as const;
128

129
export type OpenAIModel = (typeof MODELS_OPENAI)[number];
130

131
export function isOpenAIModel(model: unknown): model is OpenAIModel {
132
  return MODELS_OPENAI.includes(model as any);
133
}
134

135
// ATTN: when you modify this list, also change frontend/.../llm/llm-selector.tsx!
136
export const MISTRAL_MODELS = [
137
  // yes, all 3 of them have an extra mistral-prefix, on top of the vendor prefix
138
  "mistral-small-latest",
139
  "mistral-medium-latest", // Deprecated!
140
  "mistral-large-latest",
141
] as const;
142

143
export type MistralModel = (typeof MISTRAL_MODELS)[number];
144

145
export function isMistralModel(model: unknown): model is MistralModel {
146
  return MISTRAL_MODELS.includes(model as any);
147
}
148

149
// google's are taken from here – we use the generative AI client lib
150
// https://developers.generativeai.google/models/language
151
// $ curl -s "https://generativelanguage.googleapis.com/v1beta/models?key=$GOOGLE_GENAI" | jq
152
export const GOOGLE_MODELS = [
153
  "gemini-1.5-flash-8k", // introduced 2024-05-15
154
  "gemini-pro", // Discontinued Feb'25. Keep it to avoid breaking old references!
155
  "gemini-1.0-ultra", // hangs
156
  "gemini-1.5-pro-8k", // works now with langchaing
157
  "gemini-1.5-pro", // works now with langchaing
158
  "gemini-2.0-flash-8k",
159
  "gemini-2.0-flash-lite-8k",
160
] as const;
161
export type GoogleModel = (typeof GOOGLE_MODELS)[number];
162
export function isGoogleModel(model: unknown): model is GoogleModel {
163
  return GOOGLE_MODELS.includes(model as any);
164
}
165
export const GOOGLE_MODEL_TO_ID: Partial<{ [m in GoogleModel]: string }> = {
166
  "gemini-1.5-pro": "gemini-1.5-pro-latest",
167
  "gemini-1.5-pro-8k": "gemini-1.5-pro-latest",
168
  "gemini-1.5-flash-8k": "gemini-1.5-flash-latest",
169
  "gemini-2.0-flash-8k": "gemini-2.0-flash",
170
  "gemini-2.0-flash-lite-8k": "gemini-2.0-flash-lite",
171
} as const;
172

173
// https://docs.anthropic.com/claude/docs/models-overview -- stable names for the modesl ...
174
export const ANTHROPIC_MODELS = [
175
  "claude-3-5-sonnet",
176
  "claude-3-5-sonnet-4k", // added 2024-06-24
177
  "claude-3-haiku",
178
  "claude-3-haiku-8k", // limited context window, offered for free
179
  "claude-3-sonnet",
180
  "claude-3-sonnet-4k", // limited context window, offered for free
181
  "claude-3-opus-8k", // same issue as the large GPT models, limit the context window to limit spending
182
  "claude-3-opus",
183
] as const;
184
const CLAUDE_SONNET_VERSION = "20240229";
185
const CLAUDE_HAIKU_VERSION = "20240307";
186
const CLAUDE_OPUS_VERSION = "20240229";
187
const CLAUDE_SONNET_3_5_VERSION = "20240620";
188
// ... and we add a version number (there is no "*-latest") when dispatching on the backend
189
export const ANTHROPIC_VERSION: { [name in AnthropicModel]: string } = {
190
  "claude-3-sonnet-4k": CLAUDE_SONNET_VERSION,
191
  "claude-3-opus": CLAUDE_OPUS_VERSION,
192
  "claude-3-opus-8k": CLAUDE_OPUS_VERSION,
193
  "claude-3-sonnet": CLAUDE_SONNET_VERSION,
194
  "claude-3-5-sonnet": CLAUDE_SONNET_3_5_VERSION,
195
  "claude-3-5-sonnet-4k": CLAUDE_SONNET_3_5_VERSION,
196
  "claude-3-haiku": CLAUDE_HAIKU_VERSION,
197
  "claude-3-haiku-8k": CLAUDE_HAIKU_VERSION,
198
} as const;
199
export const ANTHROPIC_PREFIX = "anthropic-";
200
export type AnthropicModel = (typeof ANTHROPIC_MODELS)[number];
201
type AnthropicService = `${typeof ANTHROPIC_PREFIX}${AnthropicModel}`;
202
export function isAnthropicModel(model: unknown): model is AnthropicModel {
203
  return ANTHROPIC_MODELS.includes(model as any);
204
}
205
export function toAnthropicService(model: AnthropicModel): AnthropicService {
206
  return `${ANTHROPIC_PREFIX}${model}`;
207
}
208
export function isAnthropicService(
209
  service: string,
210
): service is AnthropicService {
211
  return service.startsWith(ANTHROPIC_PREFIX);
212
}
213
export function fromAnthropicService(
214
  service: AnthropicService,
215
): AnthropicModel {
216
  if (!isAnthropicService(service)) {
217
    throw new Error(`not a mistral service: ${service}`);
218
  }
219
  return service.slice(ANTHROPIC_PREFIX.length) as AnthropicModel;
220
}
221

222
// the hardcoded list of available language models – there are also dynamic ones, like OllamaLLM objects
223
export const LANGUAGE_MODELS = [
224
  ...MODELS_OPENAI,
225
  ...MISTRAL_MODELS,
226
  ...GOOGLE_MODELS,
227
  ...ANTHROPIC_MODELS,
228
] as const;
229

230
export const USER_SELECTABLE_LLMS_BY_VENDOR: {
231
  [vendor in LLMServiceName]: Readonly<LanguageModelCore[]>;
232
} = {
233
  openai: MODELS_OPENAI.filter(
234
    (m) =>
235
      m === "gpt-4" ||
236
      m === "gpt-4-turbo-preview-8k" ||
237
      m === "gpt-4o-8k" ||
238
      m === "gpt-4o-mini-8k" ||
239
      m === "gpt-4.1" ||
240
      m === "gpt-4.1-mini",
241

242
    // ATTN: there is code for o1 and o1-mini, but it does not work yet.
243
    // The API changed, there is no support for streaming, and it took
244
    // too much of my time trying to get it to work already.
245
    // m === "o1-mini-8k" ||
246
    // m === "o1-8k",
247
  ),
248
  google: GOOGLE_MODELS.filter(
249
    (m) =>
250
      // we only enable 1.5 pro and 1.5 flash with a limited context window.
251
      m === "gemini-1.5-pro-8k" ||
252
      //m === "gemini-1.5-flash-8k" ||
253
      m === "gemini-2.0-flash-8k" ||
254
      m === "gemini-2.0-flash-lite-8k",
255
  ),
256
  mistralai: MISTRAL_MODELS.filter((m) => m !== "mistral-medium-latest"),
257
  anthropic: ANTHROPIC_MODELS.filter((m) => {
258
    // we show opus and the context restricted models (to avoid high costs)
259
    return (
260
      m === "claude-3-opus-8k" ||
261
      m === "claude-3-5-sonnet-4k" ||
262
      m === "claude-3-haiku-8k"
263
    );
264
  }),
265
  ollama: [], // this is empty, because these models are not hardcoded
266
  custom_openai: [], // this is empty, because these models are not hardcoded]
267
  user: [],
268
} as const;
269

270
// This hardcodes which models can be selected by users – refine this by setting site_settings.selectable_llms!
271
// Make sure to update this when adding new models.
272
// This is used in e.g. mentionable-users.tsx, model-switch.tsx and other-settings.tsx
273
export const USER_SELECTABLE_LANGUAGE_MODELS = [
274
  ...USER_SELECTABLE_LLMS_BY_VENDOR.openai,
275
  ...USER_SELECTABLE_LLMS_BY_VENDOR.google,
276
  ...USER_SELECTABLE_LLMS_BY_VENDOR.mistralai,
277
  ...USER_SELECTABLE_LLMS_BY_VENDOR.anthropic,
278
] as const;
279

280
export type OllamaLLM = string;
281
export type CustomOpenAI = string;
282

283
// use the one without Ollama to get stronger typing. Ollama could be any string starting with the OLLAMA_PREFIX.
284
export type LanguageModelCore = (typeof LANGUAGE_MODELS)[number];
285
export type LanguageModel = LanguageModelCore | OllamaLLM;
286
export function isCoreLanguageModel(
287
  model: unknown,
288
): model is LanguageModelCore {
289
  if (typeof model !== "string") return false;
290
  return LANGUAGE_MODELS.includes(model as any);
291
}
292

293
// we check if the given object is any known language model
294
export function isLanguageModel(model?: unknown): model is LanguageModel {
295
  if (model == null) return false;
296
  if (typeof model !== "string") return false;
297
  if (isOllamaLLM(model)) return true;
298
  if (isCustomOpenAI(model)) return true;
299
  if (isUserDefinedModel(model)) return true; // this also checks, if there is a valid model inside
300
  return LANGUAGE_MODELS.includes(model as any);
301
}
302

303
export type LLMServiceName = (typeof LANGUAGE_MODEL_SERVICES)[number];
304

305
export function isLLMServiceName(service: unknown): service is LLMServiceName {
306
  if (typeof service !== "string") return false;
307
  return LANGUAGE_MODEL_SERVICES.includes(service as any);
308
}
309

310
export type LLMServicesAvailable = Record<LLMServiceName, boolean>;
311

312
interface LLMService {
313
  name: string;
314
  short: string; // additional short text next to the company name
315
  desc: string; // more detailed description
316
  url: string;
317
}
318

319
export const LLM_PROVIDER: { [key in LLMServiceName]: LLMService } = {
320
  openai: {
321
    name: "OpenAI",
322
    short: "AI research and deployment company",
323
    desc: "OpenAI is an AI research and deployment company. Their mission is to ensure that artificial general intelligence benefits all of humanity.",
324
    url: "https://openai.com/",
325
  },
326
  google: {
327
    name: "Google",
328
    short: "Technology company",
329
    desc: "Google's mission is to organize the world's information and make it universally accessible and useful.",
330
    url: "https://gemini.google.com/",
331
  },
332
  anthropic: {
333
    name: "Anthropic",
334
    short: "AI research company",
335
    desc: "Anthropic is an American artificial intelligence (AI) startup company, founded by former members of OpenAI.",
336
    url: "https://www.anthropic.com/",
337
  },
338
  mistralai: {
339
    name: "Mistral AI",
340
    short: "French AI company",
341
    desc: "Mistral AI is a French company selling artificial intelligence (AI) products.",
342
    url: "https://mistral.ai/",
343
  },
344
  ollama: {
345
    name: "Ollama",
346
    short: "Open-source software",
347
    desc: "Ollama language model server at a custom API endpoint.",
348
    url: "https://ollama.com/",
349
  },
350
  custom_openai: {
351
    name: "OpenAI API",
352
    short: "Custom endpoint",
353
    desc: "Calls a custom OpenAI API endoint.",
354
    url: "https://js.langchain.com/v0.1/docs/integrations/llms/openai/",
355
  },
356
  user: {
357
    name: "User Defined",
358
    short: "Account → Language Model",
359
    desc: "Defined by the user in Account Settings → Language Model",
360
    url: "",
361
  },
362
} as const;
363

364
interface ValidLanguageModelNameProps {
365
  model: string | undefined;
366
  filter: LLMServicesAvailable;
367
  ollama: string[]; // keys of ollama models
368
  custom_openai: string[]; // keys of custom openai models
369
  selectable_llms: string[]; // either empty, or an array stored in the server settings
370
}
371

372
// NOTE: these values must be in sync with the "no" vals in db-schema/site-defaults.ts
373
const DEFAULT_FILTER: Readonly<LLMServicesAvailable> = {
374
  openai: false,
375
  google: false,
376
  ollama: false,
377
  mistralai: false,
378
  anthropic: false,
379
  custom_openai: false,
380
  user: false,
381
} as const;
382

383
// this is used in initialization functions. e.g. to get a default model depending on the overall availability
384
// usually, this should just return the chatgpt3 model, but e.g. if neither google or openai is available,
385
// then it might even falls back to an available ollama model. It needs to return a string, though, for the frontend, etc.
386
export function getValidLanguageModelName({
387
  model,
388
  filter = DEFAULT_FILTER,
389
  ollama,
390
  custom_openai,
391
  selectable_llms,
392
}: ValidLanguageModelNameProps): LanguageModel {
393
  if (typeof model === "string" && isValidModel(model)) {
394
    try {
395
      if (isCoreLanguageModel(model)) {
396
        const v = model2vendor(model).name;
397
        if (filter[v] && selectable_llms.includes(model)) {
398
          return model;
399
        }
400
      }
401

402
      if (isOllamaLLM(model) && ollama.includes(fromOllamaModel(model))) {
403
        return model;
404
      }
405

406
      if (
407
        isCustomOpenAI(model) &&
408
        custom_openai.includes(fromCustomOpenAIModel(model))
409
      ) {
410
        return model;
411
      }
412

413
      if (isUserDefinedModel(model)) {
414
        return model;
415
      }
416
    } catch {}
417
  }
418

419
  for (const free of [true, false]) {
420
    const dflt = getDefaultLLM(
421
      selectable_llms,
422
      filter,
423
      ollama,
424
      custom_openai,
425
      free,
426
    );
427
    if (dflt != null) {
428
      return dflt;
429
    }
430
  }
431
  return DEFAULT_MODEL;
432
}
433

434
export const DEFAULT_LLM_PRIORITY: Readonly<UserDefinedLLMService[]> = [
435
  "google",
436
  "openai",
437
  "anthropic",
438
  "mistralai",
439
  "ollama",
440
  "custom_openai",
441
] as const;
442

443
export function getDefaultLLM(
444
  selectable_llms: string[],
445
  filter: LLMServicesAvailable,
446
  ollama?: { [key: string]: any },
447
  custom_openai?: { [key: string]: any },
448
  only_free = true,
449
): LanguageModel {
450
  for (const v of DEFAULT_LLM_PRIORITY) {
451
    if (!filter[v]) continue;
452
    for (const m of USER_SELECTABLE_LLMS_BY_VENDOR[v]) {
453
      if (selectable_llms.includes(m)) {
454
        const isFree = LLM_COST[m].free ?? true;
455
        if ((only_free && isFree) || !only_free) {
456
          return m;
457
        }
458
      }
459
    }
460
  }
461
  // none of the standard models, pick the first ollama or custom_openai
462
  if (ollama != null && !isEmpty(ollama)) {
463
    return toOllamaModel(Object.keys(ollama)[0]);
464
  }
465
  if (custom_openai != null && !isEmpty(custom_openai)) {
466
    return toCustomOpenAIModel(Object.keys(custom_openai)[0]);
467
  }
468
  return DEFAULT_MODEL;
469
}
470

471
export interface OpenAIMessage {
472
  role: "system" | "user" | "assistant";
473
  content: string;
474
}
475
export type OpenAIMessages = OpenAIMessage[];
476

477
export const OLLAMA_PREFIX = "ollama-";
478
export type OllamaService = string;
479
export function isOllamaService(service: string): service is OllamaService {
480
  return isOllamaLLM(service);
481
}
482

483
export const CUSTOM_OPENAI_PREFIX = "custom_openai-";
484
export type CustomOpenAIService = string;
485
export function isCustomOpenAIService(
486
  service: string,
487
): service is CustomOpenAIService {
488
  return isCustomOpenAI(service);
489
}
490

491
export const MISTRAL_PREFIX = "mistralai-";
492
export type MistralService = `${typeof MISTRAL_PREFIX}${MistralModel}`;
493
export function isMistralService(service: string): service is MistralService {
494
  return service.startsWith(MISTRAL_PREFIX);
495
}
496

497
export const GOOGLE_PREFIX = "google-";
498

499
// we encode the in the frontend and elsewhere with the service name as a prefix
500
// ATTN: don't change the encoding pattern of [vendor]-[model]
501
//       for whatever reason, it's also described that way in purchases/close.ts
502
export type LanguageServiceCore =
503
  | `${typeof OPENAI_PREFIX}${OpenAIModel}`
504
  | `${typeof GOOGLE_PREFIX}${
505
      | "text-bison-001"
506
      | "chat-bison-001"
507
      | "embedding-gecko-001"}`
508
  | `${typeof GOOGLE_PREFIX}${GoogleModel}`
509
  | AnthropicService
510
  | MistralService;
511

512
export type LanguageService =
513
  | LanguageServiceCore
514
  | OllamaService
515
  | CustomOpenAIService;
516

517
// used e.g. for checking "account-id={string}" and other things like that
518
export const LANGUAGE_MODEL_PREFIXES = [
519
  "chatgpt",
520
  ...LANGUAGE_MODEL_SERVICES.map((v) => `${v}-`),
521
] as const;
522

523
// we encode the in the frontend and elsewhere with the service name as a prefix
524
export function model2service(model: LanguageModel): LanguageService {
525
  if (model === "text-embedding-ada-002") {
526
    return `${OPENAI_PREFIX}${model}`;
527
  }
528
  if (
529
    isOllamaLLM(model) ||
530
    isCustomOpenAI(model) ||
531
    isUserDefinedModel(model)
532
  ) {
533
    return model; // already has a useful prefix
534
  }
535
  if (isMistralModel(model)) {
536
    return toMistralService(model);
537
  }
538
  if (isAnthropicModel(model)) {
539
    return toAnthropicService(model);
540
  }
541
  if (isLanguageModel(model)) {
542
    if (
543
      model === "text-bison-001" ||
544
      model === "chat-bison-001" ||
545
      model === "embedding-gecko-001" ||
546
      isGoogleModel(model)
547
    ) {
548
      return `${GOOGLE_PREFIX}${model}`;
549
    } else {
550
      return `${OPENAI_PREFIX}${model}`;
551
    }
552
  }
553

554
  throw new Error(`unknown model: ${model}`);
555
}
556

557
// inverse of model2service, but robust for chat avatars, which might not have a prefix
558
// TODO: fix the mess
559
export function service2model(
560
  service: LanguageService | "chatgpt",
561
): LanguageModel {
562
  if (service === "chatgpt") {
563
    return "gpt-3.5-turbo";
564
  }
565
  const lm = service2model_core(service);
566
  if (lm == null) {
567
    // We don't throw an error, since the frontend would crash
568
    // throw new Error(`unknown service: ${service}`);
569
    console.warn(`service2model: unknown service: ${service}`);
570
    return "gpt-3.5-turbo";
571
  }
572
  return lm;
573
}
574

575
export function service2model_core(
576
  service: LanguageService,
577
): LanguageModel | null {
578
  // split off the first part of service, e.g., "openai-" or "google-"
579
  const s = service.split("-")[0];
580
  const hasPrefix = LANGUAGE_MODEL_SERVICES.some((v) => s === v);
581

582
  if (isUserDefinedModel(service)) {
583
    return service;
584
  }
585

586
  const m = hasPrefix ? service.split("-").slice(1).join("-") : service;
587
  if (hasPrefix) {
588
    // we add the trailing "-" to match with these prefixes, which include the "-"
589
    switch (`${s}-`) {
590
      case OLLAMA_PREFIX:
591
        return toOllamaModel(m);
592
      case CUSTOM_OPENAI_PREFIX:
593
        return toCustomOpenAIModel(m);
594
    }
595
  }
596

597
  if (LANGUAGE_MODELS.includes(m as any)) {
598
    return m;
599
  }
600
  return null;
601
}
602

603
// NOTE: do not use this – instead use server_settings.default_llm
604
export const DEFAULT_MODEL: LanguageModel = "gemini-2.0-flash-8k";
605

606
interface LLMVendor {
607
  name: LLMServiceName;
608
  url: string;
609
}
610

611
export function model2vendor(model): LLMVendor {
612
  if (isUserDefinedModel(model)) {
613
    return { name: "user", url: "" };
614
  } else if (isOllamaLLM(model)) {
615
    return { name: "ollama", url: LLM_PROVIDER.ollama.url };
616
  } else if (isCustomOpenAI(model)) {
617
    return {
618
      name: "custom_openai",
619
      url: LLM_PROVIDER.custom_openai.url,
620
    };
621
  } else if (isMistralModel(model)) {
622
    return { name: "mistralai", url: LLM_PROVIDER.mistralai.url };
623
  } else if (isOpenAIModel(model)) {
624
    return { name: "openai", url: LLM_PROVIDER.openai.url };
625
  } else if (isGoogleModel(model)) {
626
    return { name: "google", url: LLM_PROVIDER.google.url };
627
  } else if (isAnthropicModel(model)) {
628
    return { name: "anthropic", url: LLM_PROVIDER.anthropic.url };
629
  }
630

631
  throw new Error(`model2vendor: unknown model: "${model}"`);
632
}
633

634
// wraps the model name in an object that indicates that it's an ollama model
635
// TODO: maybe it will be necessary at some point to pass in the list of available ollama models
636
// TODO: in the future, this object will also contain info like the max tokens and other parameters (from the DB)
637
export function toOllamaModel(model: string): OllamaLLM {
638
  if (isOllamaLLM(model)) {
639
    throw new Error(`already an ollama model: ${model}`);
640
  }
641
  return `${OLLAMA_PREFIX}${model}`;
642
}
643

644
// unwraps the model name from an object that indicates that it's an ollama model
645
export function fromOllamaModel(model: OllamaLLM) {
646
  if (!isOllamaLLM(model)) {
647
    throw new Error(`not an ollama model: ${model}`);
648
  }
649
  return model.slice(OLLAMA_PREFIX.length);
650
}
651

652
export function isOllamaLLM(model: unknown): model is OllamaLLM {
653
  return (
654
    typeof model === "string" &&
655
    model.startsWith(OLLAMA_PREFIX) &&
656
    model.length > OLLAMA_PREFIX.length
657
  );
658
}
659

660
export function toCustomOpenAIModel(model: string): CustomOpenAI {
661
  if (isCustomOpenAI(model)) {
662
    throw new Error(`already a custom openai model: ${model}`);
663
  }
664
  return `${CUSTOM_OPENAI_PREFIX}${model}`;
665
}
666

667
export function isCustomOpenAI(model: unknown): model is CustomOpenAI {
668
  return (
669
    typeof model === "string" &&
670
    model.startsWith(CUSTOM_OPENAI_PREFIX) &&
671
    model.length > CUSTOM_OPENAI_PREFIX.length
672
  );
673
}
674

675
export function fromCustomOpenAIModel(model: CustomOpenAI) {
676
  if (!isCustomOpenAI(model)) {
677
    throw new Error(`not a custom openai model: ${model}`);
678
  }
679
  return model.slice(CUSTOM_OPENAI_PREFIX.length);
680
}
681

682
export function toMistralService(model: string): MistralService {
683
  if (isMistralService(model)) {
684
    throw new Error(`already a mistral model: ${model}`);
685
  }
686
  if (!isMistralModel(model)) {
687
    throw new Error(`not a mistral model: ${model}`);
688
  }
689
  return `${MISTRAL_PREFIX}${model}`;
690
}
691

692
export function fromMistralService(model: MistralService) {
693
  if (!isMistralService(model)) {
694
    throw new Error(`not a mistral model: ${model}`);
695
  }
696
  return model.slice(MISTRAL_PREFIX.length);
697
}
698

699
type LLM2String = {
700
  [key in
701
    | (typeof USER_SELECTABLE_LANGUAGE_MODELS)[number]
702
    | "chatgpt" // some additional ones, backwards compatibility
703
    | "chatgpt3"
704
    | "chatgpt4"
705
    | "gpt-4-32k"
706
    | "text-bison-001"
707
    | "chat-bison-001"]: string;
708
};
709

710
// Map from psuedo account_id to what should be displayed to user.
711
// This is used in various places in the frontend.
712
// Google PaLM: https://cloud.google.com/vertex-ai/docs/generative-ai/pricing
713
export const LLM_USERNAMES: LLM2String = {
714
  chatgpt: "GPT-3.5",
715
  chatgpt3: "GPT-3.5",
716
  chatgpt4: "GPT-4",
717
  "gpt-4": "GPT-4",
718
  "gpt-4-32k": "GPT-4-32k",
719
  "gpt-3.5-turbo": "GPT-3.5",
720
  "gpt-3.5-turbo-16k": "GPT-3.5-16k",
721
  "gpt-4-turbo-preview": "GPT-4 Turbo 128k",
722
  "gpt-4-turbo-preview-8k": "GPT-4 Turbo",
723
  "gpt-4-turbo": "GPT-4 Turbo 128k",
724
  "gpt-4-turbo-8k": "GPT-4 Turbo",
725
  "gpt-4o": "GPT-4o 128k",
726
  "gpt-4o-8k": "GPT-4o",
727
  "gpt-4o-mini": "GPT-4o Mini 128k",
728
  "gpt-4o-mini-8k": "GPT-4o Mini",
729
  "gpt-4.1": "GPT-4.1",
730
  "gpt-4.1-mini": "GPT-4.1 Mini",
731
  "o1-mini-8k": "OpenAI o1-mini",
732
  "o1-8k": "OpenAI o1",
733
  "o1-mini": "OpenAI o1-mini",
734
  o1: "OpenAI o1",
735
  "text-embedding-ada-002": "Text Embedding Ada 002", // TODO: this is for embeddings, should be moved to a different place
736
  "text-bison-001": "PaLM 2",
737
  "chat-bison-001": "PaLM 2",
738
  "gemini-pro": "Gemini 1.0 Pro",
739
  "gemini-1.0-ultra": "Gemini 1.0 Ultra",
740
  "gemini-1.5-pro": "Gemini 1.5 Pro 1m",
741
  "gemini-1.5-pro-8k": "Gemini 1.5 Pro",
742
  "gemini-1.5-flash-8k": "Gemini 1.5 Flash",
743
  "gemini-2.0-flash-8k": "Gemini 2.0 Flash",
744
  "gemini-2.0-flash-lite-8k": "Gemini 2.0 Flash Lite",
745
  "mistral-small-latest": "Mistral AI Small",
746
  "mistral-medium-latest": "Mistral AI Medium",
747
  "mistral-large-latest": "Mistral AI Large",
748
  "claude-3-haiku": "Claude 3 Haiku 200k",
749
  "claude-3-haiku-8k": "Claude 3 Haiku",
750
  "claude-3-sonnet": "Claude 3 Sonnet 200k",
751
  "claude-3-sonnet-4k": "Claude 3 Sonnet",
752
  "claude-3-5-sonnet": "Claude 3.5 Sonnet 200k",
753
  "claude-3-5-sonnet-4k": "Claude 3.5 Sonnet",
754
  "claude-3-opus": "Claude 3 Opus 200k",
755
  "claude-3-opus-8k": "Claude 3 Opus",
756
} as const;
757

758
// similar to the above, we map to short user-visible description texts
759
// this comes next to the name, hence you do not have to mention the name
760
export const LLM_DESCR: LLM2String = {
761
  chatgpt: "Fast, great for everyday tasks. (OpenAI, 4k token context)",
762
  chatgpt3: "Fast, great for everyday tasks. (OpenAI, 4k token context)",
763
  chatgpt4:
764
    "Can follow complex instructions and solve difficult problems. (OpenAI, 8k token context)",
765
  "gpt-4":
766
    "Powerful OpenAI model. Can follow complex instructions and solve difficult problems. (OpenAI, 8k token context)",
767
  "gpt-4.1":
768
    "Powerful OpenAI model. Can follow complex instructions and solve difficult problems. (OpenAI, 8k token context)",
769
  "gpt-4-32k": "",
770
  "gpt-3.5-turbo": "Fast, great for everyday tasks. (OpenAI, 4k token context)",
771
  "gpt-3.5-turbo-16k": `Same as ${LLM_USERNAMES["gpt-3.5-turbo"]} but with larger 16k token context`,
772
  "gpt-4-turbo-preview-8k":
773
    "More powerful, fresher knowledge, and lower price than GPT-4. (OpenAI, 8k token context)",
774
  "gpt-4-turbo-preview": "Like GPT-4 Turbo, but with up to 128k token context",
775
  "gpt-4-turbo-8k":
776
    "Faster, fresher knowledge, and lower price than GPT-4. (OpenAI, 8k token context)",
777
  "gpt-4-turbo": "Like GPT-4 Turbo, but with up to 128k token context",
778
  "gpt-4o-8k":
779
    "Most powerful, fastest, and cheapest (OpenAI, 8k token context)",
780
  "gpt-4o": "Most powerful fastest, and cheapest (OpenAI, 128k token context)",
781
  "gpt-4o-mini-8k":
782
    "Most cost-efficient small model (OpenAI, 8k token context)",
783
  "gpt-4.1-mini": "Most cost-efficient small model (OpenAI, 8k token context)",
784
  "gpt-4o-mini": "Most cost-efficient small model (OpenAI, 128k token context)",
785
  "text-embedding-ada-002": "Text embedding Ada 002 by OpenAI", // TODO: this is for embeddings, should be moved to a different place
786
  "o1-8k": "Spends more time thinking (8k token context)",
787
  "o1-mini-8k": "A cost-efficient reasoning model (8k token context)",
788
  o1: "Spends more time thinking (8k token context)",
789
  "o1-mini": "A cost-efficient reasoning model (8k token context)",
790
  "text-bison-001": "",
791
  "chat-bison-001": "",
792
  "gemini-pro":
793
    "Google's Gemini 1.0 Pro Generative AI model (30k token context)",
794
  "gemini-1.0-ultra":
795
    "Google's Gemini 1.0 Ultra Generative AI model (30k token context)",
796
  "gemini-1.5-pro":
797
    "Google's Gemini 1.5 Pro Generative AI model (1m token context)",
798
  "gemini-1.5-pro-8k":
799
    "Google's Gemini 1.5 Pro Generative AI model (8k token context)",
800
  "gemini-1.5-flash-8k":
801
    "Google's Gemini 1.5 Flash Generative AI model (8k token context)",
802
  "gemini-2.0-flash-8k":
803
    "Google's Gemini 2.0 Flash Generative AI model (8k token context)",
804
  "gemini-2.0-flash-lite-8k":
805
    "Google's Gemini 2.0 Flash Lite Generative AI model (8k token context)",
806
  "mistral-small-latest":
807
    "Fast, simple queries, short answers, less capabilities. (Mistral AI, 4k token context)",
808
  "mistral-medium-latest":
809
    "Intermediate tasks, summarizing, generating documents, etc. (Mistral AI, 4k token context)",
810
  "mistral-large-latest":
811
    "Most powerful, large reasoning capabilities, but slower. (Mistral AI, 4k token context)",
812
  "claude-3-haiku":
813
    "Fastest model, lightweight actions (Anthropic, 200k token context)",
814
  "claude-3-haiku-8k":
815
    "Fastest model, lightweight actions (Anthropic, 8k token context)",
816
  "claude-3-5-sonnet":
817
    "Our most intelligent model (Anthropic, 200k token context)",
818
  "claude-3-5-sonnet-4k":
819
    "Our most intelligent model (Anthropic, 4k token context)",
820
  "claude-3-sonnet":
821
    "Best combination of performance and speed (Anthropic, 200k token context)",
822
  "claude-3-sonnet-4k":
823
    "Best combination of performance and speed (Anthropic, 4k token context)",
824
  "claude-3-opus":
825
    "Excels at writing and complex tasks (Anthropic, 200k token context)",
826
  "claude-3-opus-8k":
827
    "Excels at writing and complex tasks (Anthropic, 8k token context)",
828
} as const;
829

830
export function isFreeModel(model: unknown, isCoCalcCom: boolean): boolean {
831
  if (!isCoCalcCom) return true;
832
  if (isUserDefinedModel(model)) return true;
833
  if (isOllamaLLM(model)) return true;
834
  if (isCustomOpenAI(model)) return true;
835
  if (typeof model === "string" && LANGUAGE_MODELS.includes(model as any)) {
836
    // i.e. model is now of type CoreLanguageModel and
837
    const costInfo = LLM_COST[model];
838
    if (costInfo != null) {
839
      return costInfo.free;
840
    }
841
  }
842
  // all others are free (this should actually never happen, but we're cautious)
843
  return true;
844
}
845

846
// this is used in purchases/get-service-cost
847
// we only need to check for the vendor prefixes, no special cases!
848
export function isLanguageModelService(
849
  service: string,
850
): service is LanguageService {
851
  if (isUserDefinedModel(service)) return true;
852
  for (const v of LANGUAGE_MODEL_SERVICES) {
853
    if (service.startsWith(`${v}-`)) {
854
      return true;
855
    }
856
  }
857
  return false;
858
}
859

860
export function getLLMServiceStatusCheckMD(service: LLMServiceName): string {
861
  switch (service) {
862
    case "openai":
863
      return `OpenAI [status](https://status.openai.com) and [downdetector](https://downdetector.com/status/openai).`;
864
    case "google":
865
      return `Google [status](https://status.cloud.google.com) and [downdetector](https://downdetector.com/status/google-cloud).`;
866
    case "ollama":
867
      return `No status information for Ollama available.`;
868
    case "custom_openai":
869
      return `No status information for Custom OpenAI available.`;
870
    case "mistralai":
871
      return `No status information for Mistral AI available.`;
872
    case "anthropic":
873
      return `Anthropic [status](https://status.anthropic.com/).`;
874
    case "user":
875
      return `No status information for user defined model available.`;
876
    default:
877
      unreachable(service);
878
  }
879
  return "";
880
}
881

882
interface Cost {
883
  prompt_tokens: number;
884
  completion_tokens: number;
885
  max_tokens: number;
886
  free: boolean; // whether this model has a metered paid usage, or offered for free
887
}
888

889
// price per token for a given price of USD per 1M tokens
890
function usd1Mtokens(usd: number): number {
891
  return usd / 1_000_000;
892
}
893

894
// This is the official published cost that openai charges.
895
// It changes over time, so this will sometimes need to be updated.
896
// Our cost is a configurable multiple of this.
897
// https://openai.com/pricing#language-models
898
// There appears to be no api that provides the prices, unfortunately.
899
export const LLM_COST: { [name in LanguageModelCore]: Cost } = {
900
  "gpt-4": {
901
    prompt_tokens: usd1Mtokens(30),
902
    completion_tokens: usd1Mtokens(60),
903
    max_tokens: 8192,
904
    free: false,
905
  },
906
  "gpt-4-32k": {
907
    prompt_tokens: usd1Mtokens(60),
908
    completion_tokens: usd1Mtokens(120),
909
    max_tokens: 32768,
910
    free: false,
911
  },
912
  "gpt-3.5-turbo": {
913
    prompt_tokens: usd1Mtokens(3),
914
    completion_tokens: usd1Mtokens(6),
915
    max_tokens: 4096,
916
    free: true,
917
  },
918
  "gpt-3.5-turbo-16k": {
919
    prompt_tokens: usd1Mtokens(3),
920
    completion_tokens: usd1Mtokens(6),
921
    max_tokens: 16384,
922
    free: false,
923
  },
924
  // like above, but we limit the tokens to reduce how much money user has to commit to
925
  "gpt-4-turbo-preview-8k": {
926
    prompt_tokens: usd1Mtokens(10),
927
    completion_tokens: usd1Mtokens(30),
928
    max_tokens: 8192, // the actual reply is 8k, and we use this to truncate the input prompt!
929
    free: false,
930
  },
931
  "gpt-4-turbo-preview": {
932
    prompt_tokens: usd1Mtokens(10), // 	$10.00 / 1M tokens
933
    completion_tokens: usd1Mtokens(30), // $30.00 / 1M tokens
934
    max_tokens: 128000, // This is a lot: blows up the "max cost" calculation → requires raising the minimum balance and quota limit
935
    free: false,
936
  }, // like above, but we limit the tokens to reduce how much money user has to commit to
937
  "gpt-4-turbo-8k": {
938
    prompt_tokens: usd1Mtokens(10),
939
    completion_tokens: usd1Mtokens(30),
940
    max_tokens: 8192, // the actual reply is 8k, and we use this to truncate the input prompt!
941
    free: false,
942
  },
943
  "gpt-4-turbo": {
944
    prompt_tokens: usd1Mtokens(10), // 	$10.00 / 1M tokens
945
    completion_tokens: usd1Mtokens(30), // $30.00 / 1M tokens
946
    max_tokens: 128000, // This is a lot: blows up the "max cost" calculation → requires raising the minimum balance and quota limit
947
    free: false,
948
  },
949
  "gpt-4.1": {
950
    prompt_tokens: usd1Mtokens(2),
951
    completion_tokens: usd1Mtokens(8),
952
    max_tokens: 8192,
953
    free: false,
954
  },
955
  "gpt-4.1-mini": {
956
    prompt_tokens: usd1Mtokens(0.4),
957
    completion_tokens: usd1Mtokens(1.6),
958
    max_tokens: 8192,
959
    free: true,
960
  },
961
  "gpt-4o-8k": {
962
    prompt_tokens: usd1Mtokens(2.5),
963
    completion_tokens: usd1Mtokens(10),
964
    max_tokens: 8192, // like gpt-4-turbo-8k
965
    free: false,
966
  },
967
  "gpt-4o": {
968
    prompt_tokens: usd1Mtokens(2.5),
969
    completion_tokens: usd1Mtokens(10),
970
    max_tokens: 128000, // This is a lot: blows up the "max cost" calculation → requires raising the minimum balance and quota limit
971
    free: false,
972
  },
973
  "gpt-4o-mini-8k": {
974
    prompt_tokens: usd1Mtokens(0.15),
975
    completion_tokens: usd1Mtokens(0.6),
976
    max_tokens: 8192, // like gpt-4-turbo-8k
977
    free: true,
978
  },
979
  "gpt-4o-mini": {
980
    prompt_tokens: usd1Mtokens(0.15),
981
    completion_tokens: usd1Mtokens(0.6),
982
    max_tokens: 128000, // This is a lot: blows up the "max cost" calculation → requires raising the minimum balance and quota limit
983
    free: true,
984
  },
985
  o1: {
986
    prompt_tokens: usd1Mtokens(15),
987
    completion_tokens: usd1Mtokens(7.5),
988
    max_tokens: 8192, // like gpt-4-turbo-8k
989
    free: false,
990
  },
991
  "o1-mini": {
992
    prompt_tokens: usd1Mtokens(3),
993
    completion_tokens: usd1Mtokens(1.5),
994
    max_tokens: 8192, // like gpt-4-turbo-8k
995
    free: false,
996
  },
997
  "o1-8k": {
998
    prompt_tokens: usd1Mtokens(15),
999
    completion_tokens: usd1Mtokens(7.5),
1000
    max_tokens: 8192, // like gpt-4-turbo-8k
1001
    free: false,
1002
  },
1003
  "o1-mini-8k": {
1004
    prompt_tokens: usd1Mtokens(3),
1005
    completion_tokens: usd1Mtokens(1.5),
1006
    max_tokens: 8192, // like gpt-4-turbo-8k
1007
    free: false,
1008
  },
1009
  // also OpenAI
1010
  "text-embedding-ada-002": {
1011
    prompt_tokens: 0.0001 / 1000,
1012
    completion_tokens: 0.0001 / 1000, // NOTE: this isn't a thing with embeddings
1013
    max_tokens: 8191,
1014
    free: false,
1015
  },
1016
  // https://ai.google.dev/pricing
1017
  "gemini-pro": {
1018
    prompt_tokens: usd1Mtokens(0.5),
1019
    completion_tokens: usd1Mtokens(1.5),
1020
    max_tokens: 30720,
1021
    free: true,
1022
  },
1023
  "gemini-1.5-pro-8k": {
1024
    prompt_tokens: usd1Mtokens(1.25), // (we're below the 128k context)
1025
    completion_tokens: usd1Mtokens(5),
1026
    max_tokens: 8_000,
1027
    free: false,
1028
  },
1029
  "gemini-1.5-pro": {
1030
    prompt_tokens: usd1Mtokens(2.5),
1031
    completion_tokens: usd1Mtokens(10),
1032
    max_tokens: 1048576,
1033
    free: false,
1034
  },
1035
  "gemini-1.0-ultra": {
1036
    prompt_tokens: usd1Mtokens(1), // TODO: price not yet known!
1037
    completion_tokens: usd1Mtokens(1),
1038
    max_tokens: 30720,
1039
    free: true,
1040
  },
1041
  "gemini-1.5-flash-8k": {
1042
    prompt_tokens: usd1Mtokens(0.075),
1043
    completion_tokens: usd1Mtokens(0.3),
1044
    max_tokens: 8_000,
1045
    free: true,
1046
  },
1047
  // https://ai.google.dev/gemini-api/docs/pricing?hl=de
1048
  "gemini-2.0-flash-8k": {
1049
    prompt_tokens: usd1Mtokens(0.1),
1050
    completion_tokens: usd1Mtokens(0.4),
1051
    max_tokens: 8_000,
1052
    free: true,
1053
  },
1054
  "gemini-2.0-flash-lite-8k": {
1055
    prompt_tokens: usd1Mtokens(0.075),
1056
    completion_tokens: usd1Mtokens(0.3),
1057
    max_tokens: 8_000,
1058
    free: true,
1059
  },
1060
  // https://mistral.ai/technology/
1061
  "mistral-small-latest": {
1062
    prompt_tokens: usd1Mtokens(0.2),
1063
    completion_tokens: usd1Mtokens(0.6),
1064
    max_tokens: 4096, // TODO don't know the real value, see getMaxTokens
1065
    free: true,
1066
  },
1067
  "mistral-medium-latest": {
1068
    prompt_tokens: usd1Mtokens(2.7),
1069
    completion_tokens: usd1Mtokens(8.1),
1070
    max_tokens: 4096, // TODO don't know the real value, see getMaxTokens
1071
    free: true,
1072
  },
1073
  "mistral-large-latest": {
1074
    prompt_tokens: usd1Mtokens(2),
1075
    completion_tokens: usd1Mtokens(6),
1076
    max_tokens: 4096, // TODO don't know the real value, see getMaxTokens
1077
    free: false,
1078
  },
1079
  // Anthropic: pricing somewhere on that page: https://www.anthropic.com/api
1080
  "claude-3-opus-8k": {
1081
    prompt_tokens: usd1Mtokens(15),
1082
    completion_tokens: usd1Mtokens(75),
1083
    max_tokens: 8_000, // limited to 8k tokens, to reduce the necessary spend limit to commit to
1084
    free: false,
1085
  },
1086
  "claude-3-opus": {
1087
    prompt_tokens: usd1Mtokens(15),
1088
    completion_tokens: usd1Mtokens(75),
1089
    max_tokens: 200_000,
1090
    free: false,
1091
  },
1092
  "claude-3-5-sonnet": {
1093
    prompt_tokens: usd1Mtokens(3),
1094
    completion_tokens: usd1Mtokens(15),
1095
    max_tokens: 200_000,
1096
    free: false,
1097
  },
1098
  "claude-3-5-sonnet-4k": {
1099
    prompt_tokens: usd1Mtokens(3),
1100
    completion_tokens: usd1Mtokens(15),
1101
    max_tokens: 4_000, // limited to 4k tokens
1102
    free: false,
1103
  },
1104
  "claude-3-sonnet-4k": {
1105
    prompt_tokens: usd1Mtokens(3),
1106
    completion_tokens: usd1Mtokens(15),
1107
    max_tokens: 4_000, // limited to 4k tokens, offered for free
1108
    free: true,
1109
  },
1110
  "claude-3-sonnet": {
1111
    prompt_tokens: usd1Mtokens(3),
1112
    completion_tokens: usd1Mtokens(15),
1113
    max_tokens: 200_000,
1114
    free: false,
1115
  },
1116
  "claude-3-haiku-8k": {
1117
    prompt_tokens: usd1Mtokens(0.8),
1118
    completion_tokens: usd1Mtokens(4),
1119
    max_tokens: 8_000, // limited to 8k tokens, offered for free
1120
    free: true,
1121
  },
1122
  "claude-3-haiku": {
1123
    prompt_tokens: usd1Mtokens(0.8),
1124
    completion_tokens: usd1Mtokens(4),
1125
    max_tokens: 200_000,
1126
    free: false,
1127
  },
1128
} as const;
1129

1130
// TODO: remove this test – it's only used server side, and that server side check should work for all known LLM models
1131
export function isValidModel(model?: string): boolean {
1132
  if (model == null) return false;
1133
  if (isUserDefinedModel(model)) return true;
1134
  if (isOllamaLLM(model)) return true;
1135
  if (isCustomOpenAI(model)) return true;
1136
  if (isMistralModel(model)) return true;
1137
  if (isGoogleModel(model)) return true;
1138
  return LLM_COST[model ?? ""] != null;
1139
}
1140

1141
export function getMaxTokens(model?: LanguageModel): number {
1142
  // TODO: store max tokens in the model object itself, this is just a fallback
1143
  if (isOllamaLLM(model)) return 8192;
1144
  if (isMistralModel(model)) return 4096; // TODO: check with MistralAI
1145
  return LLM_COST[model ?? ""]?.max_tokens ?? 4096;
1146
}
1147

1148
export interface LLMCost {
1149
  prompt_tokens: number;
1150
  completion_tokens: number;
1151
}
1152

1153
export function getLLMCost(
1154
  model: LanguageModelCore,
1155
  markup_percentage: number, // a number like "30" would mean that we increase the wholesale price by multiplying by 1.3
1156
): LLMCost {
1157
  const x = LLM_COST[model];
1158
  if (x == null) {
1159
    throw Error(`unknown model "${model}"`);
1160
  }
1161
  const { prompt_tokens, completion_tokens } = x;
1162
  if (markup_percentage < 0) {
1163
    throw Error("markup percentage can't be negative");
1164
  }
1165
  const f = 1 + markup_percentage / 100;
1166
  return {
1167
    prompt_tokens: prompt_tokens * f,
1168
    completion_tokens: completion_tokens * f,
1169
  };
1170
}
1171

1172
const priceRangeCache = new LRU<string, ReturnType<typeof getLLMPriceRange>>({
1173
  max: 10,
1174
});
1175

1176
export function getLLMPriceRange(
1177
  prompt: number,
1178
  output: number,
1179
  markup_percentage: number,
1180
): { min: number; max: number } {
1181
  const cacheKey = `${prompt}::${output}::${markup_percentage}`;
1182
  const cached = priceRangeCache.get(cacheKey);
1183
  if (cached) return cached;
1184

1185
  let min = Infinity;
1186
  let max = 0;
1187
  for (const key in LLM_COST) {
1188
    const model = LLM_COST[key];
1189
    if (!model || isFreeModel(key, true)) continue;
1190
    const { prompt_tokens, completion_tokens } = getLLMCost(
1191
      key as LanguageModelCore,
1192
      markup_percentage,
1193
    );
1194
    const p = prompt * prompt_tokens + output * completion_tokens;
1195

1196
    min = Math.min(min, p);
1197
    max = Math.max(max, p);
1198
  }
1199
  const ret = { min, max };
1200
  priceRangeCache.set(cacheKey, ret);
1201
  return ret;
1202
}
1203

1204
// The maximum cost for one single call using the given model.
1205
// We can't know the cost until after it happens, so this bound is useful for
1206
// ensuring user can afford to make a call.
1207
export function getMaxCost(
1208
  model: LanguageModelCore,
1209
  markup_percentage: number,
1210
): number {
1211
  const { prompt_tokens, completion_tokens } = getLLMCost(
1212
    model,
1213
    markup_percentage,
1214
  );
1215
  const { max_tokens } = LLM_COST[model];
1216
  return Math.max(prompt_tokens, completion_tokens) * max_tokens;
1217
}
1218

1219
/**
1220
 * Initially, we just had one system promt for all LLMs.
1221
 * This was tuned for the ChatGPTs by OpenAI, but breaks down for others.
1222
 * For example, Gemini and Mistral are confused by mentioning "CoCalc" and insert code cells for all kinds of questions.
1223
 */
1224
export function getSystemPrompt(
1225
  model: LanguageModel,
1226
  _path: string | undefined,
1227
) {
1228
  // TODO: for now, path is ignored. We might want to use it to customize the prompt in the future.
1229
  const common = "Be brief.";
1230
  const math = "Enclose any math formulas in $.";
1231

1232
  if (
1233
    model2vendor(model).name === "openai" ||
1234
    model.startsWith(OPENAI_PREFIX)
1235
  ) {
1236
    const mdCode =
1237
      "Include the language directly after the triple backticks in all markdown code blocks.";
1238
    return `Assume full access to CoCalc and using CoCalc right now.\n${mdCode}\n${math}\n${common}`;
1239
  }
1240

1241
  // mistral stupidly inserts anything mentioned in the prompt as examples, always.
1242
  if (
1243
    model2vendor(model).name === "mistralai" ||
1244
    model.startsWith(MISTRAL_PREFIX)
1245
  ) {
1246
    return common;
1247
  }
1248

1249
  if (
1250
    model2vendor(model).name === "google" ||
1251
    model.startsWith(GOOGLE_PREFIX)
1252
  ) {
1253
    return `${math}\n${common}`;
1254
  }
1255

1256
  if (
1257
    model2vendor(model).name === "ollama" ||
1258
    model.startsWith(OLLAMA_PREFIX)
1259
  ) {
1260
    return `${common}`;
1261
  }
1262

1263
  if (
1264
    model2vendor(model).name === "anthropic" ||
1265
    model.startsWith(ANTHROPIC_PREFIX)
1266
  ) {
1267
    return `${math}\n${common}`;
1268
  }
1269

1270
  const mdCode = `Any code blocks in triple backticks should mention the language after the first backticks. For example \`\`\`python\nprint("Hello, World!")\n\`\`\``;
1271
  return `${mdCode}\n${math}\n${common}`;
1272
}
1273

1274
Product

Resources

Company