Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
sagemathinc
GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/packages/util/db-schema/llm-utils.ts
1447 views
1
// this contains bits and pieces from the wrongly named openai.ts file
2
3
import { isEmpty } from "lodash";
4
import LRU from "lru-cache";
5
6
import { unreachable } from "@cocalc/util/misc";
7
8
// these can be defined by admins and users
9
export const SERVICES = [
10
"openai",
11
"google",
12
"mistralai", // the "*ai" is deliberately, because their model names start with "mistral-..." and we have to distinguish it from the prefix
13
"anthropic",
14
"ollama",
15
"custom_openai",
16
] as const;
17
18
// a "user-*" model is a wrapper for all the model services
19
export const LANGUAGE_MODEL_SERVICES = [...SERVICES, "user"] as const;
20
21
export type UserDefinedLLMService = (typeof SERVICES)[number];
22
23
export function isUserDefinedModelType(
24
model: unknown,
25
): model is UserDefinedLLMService {
26
return SERVICES.includes(model as any);
27
}
28
29
// "User LLMs" are defined in the user's account settings.
30
// They query an external LLM service of given type, endpoint, and API key.
31
export interface UserDefinedLLM {
32
id: number; // a unique number
33
service: UserDefinedLLMService;
34
model: string; // non-empty string
35
display: string; // short user-visible string
36
endpoint: string; // URL to the LLM service
37
apiKey: string;
38
icon?: string; // https://.../...png
39
}
40
41
export const USER_LLM_PREFIX = "user-";
42
43
// This basically prefixes the "model" defined by the user with the USER and service prefix.
44
// We do not use the to*() functions, because the names of the models could be arbitrary – for each service
45
export function toUserLLMModelName(llm: UserDefinedLLM) {
46
const { service } = llm;
47
const model: string = (() => {
48
switch (service) {
49
case "custom_openai":
50
return `${CUSTOM_OPENAI_PREFIX}${llm.model}`;
51
case "ollama":
52
return toOllamaModel(llm.model);
53
case "anthropic":
54
return `${ANTHROPIC_PREFIX}${llm.model}`;
55
case "google":
56
return `${GOOGLE_PREFIX}${llm.model}`;
57
case "mistralai":
58
return `${MISTRAL_PREFIX}${llm.model}`;
59
case "openai":
60
return `${OPENAI_PREFIX}${llm.model}`;
61
default:
62
unreachable(service);
63
throw new Error(
64
`toUserLLMModelName of service ${service} not supported`,
65
);
66
}
67
})();
68
return `${USER_LLM_PREFIX}${model}`;
69
}
70
71
export function fromUserDefinedLLMModel(m: string): string | null {
72
if (isUserDefinedModel(m)) {
73
return m.slice(USER_LLM_PREFIX.length);
74
}
75
return null;
76
}
77
78
export function isUserDefinedModel(model: unknown): boolean {
79
if (typeof model !== "string") return false;
80
if (model.startsWith(USER_LLM_PREFIX)) {
81
const m2 = model.slice(USER_LLM_PREFIX.length);
82
return SERVICES.some((svc) => m2.startsWith(`${svc}-`));
83
}
84
return false;
85
}
86
87
export function unpackUserDefinedLLMModel(model: string): {
88
service: UserDefinedLLMService;
89
model: string;
90
} | null {
91
const um = fromUserDefinedLLMModel(model);
92
if (um === null) return null;
93
for (const service of SERVICES) {
94
if (um.startsWith(`${service}-`)) {
95
return { service, model: um.slice(service.length + 1) };
96
}
97
}
98
return null;
99
}
100
101
export const OPENAI_PREFIX = "openai-";
102
103
// NOTE: all arrays of model names should order them by the "simples and fastest" to the "complex, slowest, most expensive"
104
// that way, the ordering the UI isn't looking arbitrary, but has a clear logic
105
106
export const MODELS_OPENAI = [
107
"gpt-3.5-turbo",
108
"gpt-4o-mini-8k", // context limited
109
"gpt-4o-mini", // Released 2024-07-18
110
"gpt-4o-8k", // context limited, similar to gpt-4-turbo-8k
111
"gpt-4o", // Released 2024-05-13
112
// the "preview" variants are disabled, because the preview is over
113
"gpt-4-turbo-preview-8k", // like below, but artificially limited to 8k tokens
114
"gpt-4-turbo-preview",
115
"gpt-4-turbo-8k", // Released 2024-04-11
116
"gpt-4-turbo",
117
"gpt-4",
118
"gpt-4.1",
119
"gpt-4.1-mini",
120
"gpt-4-32k",
121
"gpt-3.5-turbo-16k",
122
"text-embedding-ada-002", // TODO: this is for embeddings, should be moved to a different place
123
"o1-mini-8k",
124
"o1-mini",
125
"o1-8k",
126
"o1",
127
] as const;
128
129
export type OpenAIModel = (typeof MODELS_OPENAI)[number];
130
131
export function isOpenAIModel(model: unknown): model is OpenAIModel {
132
return MODELS_OPENAI.includes(model as any);
133
}
134
135
// ATTN: when you modify this list, also change frontend/.../llm/llm-selector.tsx!
136
export const MISTRAL_MODELS = [
137
// yes, all 3 of them have an extra mistral-prefix, on top of the vendor prefix
138
"mistral-small-latest",
139
"mistral-medium-latest", // Deprecated!
140
"mistral-large-latest",
141
] as const;
142
143
export type MistralModel = (typeof MISTRAL_MODELS)[number];
144
145
export function isMistralModel(model: unknown): model is MistralModel {
146
return MISTRAL_MODELS.includes(model as any);
147
}
148
149
// google's are taken from here – we use the generative AI client lib
150
// https://developers.generativeai.google/models/language
151
// $ curl -s "https://generativelanguage.googleapis.com/v1beta/models?key=$GOOGLE_GENAI" | jq
152
export const GOOGLE_MODELS = [
153
"gemini-1.5-flash-8k", // introduced 2024-05-15
154
"gemini-pro", // Discontinued Feb'25. Keep it to avoid breaking old references!
155
"gemini-1.0-ultra", // hangs
156
"gemini-1.5-pro-8k", // works now with langchaing
157
"gemini-1.5-pro", // works now with langchaing
158
"gemini-2.0-flash-8k",
159
"gemini-2.0-flash-lite-8k",
160
] as const;
161
export type GoogleModel = (typeof GOOGLE_MODELS)[number];
162
export function isGoogleModel(model: unknown): model is GoogleModel {
163
return GOOGLE_MODELS.includes(model as any);
164
}
165
export const GOOGLE_MODEL_TO_ID: Partial<{ [m in GoogleModel]: string }> = {
166
"gemini-1.5-pro": "gemini-1.5-pro-latest",
167
"gemini-1.5-pro-8k": "gemini-1.5-pro-latest",
168
"gemini-1.5-flash-8k": "gemini-1.5-flash-latest",
169
"gemini-2.0-flash-8k": "gemini-2.0-flash",
170
"gemini-2.0-flash-lite-8k": "gemini-2.0-flash-lite",
171
} as const;
172
173
// https://docs.anthropic.com/claude/docs/models-overview -- stable names for the modesl ...
174
export const ANTHROPIC_MODELS = [
175
"claude-3-5-sonnet",
176
"claude-3-5-sonnet-4k", // added 2024-06-24
177
"claude-3-haiku",
178
"claude-3-haiku-8k", // limited context window, offered for free
179
"claude-3-sonnet",
180
"claude-3-sonnet-4k", // limited context window, offered for free
181
"claude-3-opus-8k", // same issue as the large GPT models, limit the context window to limit spending
182
"claude-3-opus",
183
] as const;
184
const CLAUDE_SONNET_VERSION = "20240229";
185
const CLAUDE_HAIKU_VERSION = "20240307";
186
const CLAUDE_OPUS_VERSION = "20240229";
187
const CLAUDE_SONNET_3_5_VERSION = "20240620";
188
// ... and we add a version number (there is no "*-latest") when dispatching on the backend
189
export const ANTHROPIC_VERSION: { [name in AnthropicModel]: string } = {
190
"claude-3-sonnet-4k": CLAUDE_SONNET_VERSION,
191
"claude-3-opus": CLAUDE_OPUS_VERSION,
192
"claude-3-opus-8k": CLAUDE_OPUS_VERSION,
193
"claude-3-sonnet": CLAUDE_SONNET_VERSION,
194
"claude-3-5-sonnet": CLAUDE_SONNET_3_5_VERSION,
195
"claude-3-5-sonnet-4k": CLAUDE_SONNET_3_5_VERSION,
196
"claude-3-haiku": CLAUDE_HAIKU_VERSION,
197
"claude-3-haiku-8k": CLAUDE_HAIKU_VERSION,
198
} as const;
199
export const ANTHROPIC_PREFIX = "anthropic-";
200
export type AnthropicModel = (typeof ANTHROPIC_MODELS)[number];
201
type AnthropicService = `${typeof ANTHROPIC_PREFIX}${AnthropicModel}`;
202
export function isAnthropicModel(model: unknown): model is AnthropicModel {
203
return ANTHROPIC_MODELS.includes(model as any);
204
}
205
export function toAnthropicService(model: AnthropicModel): AnthropicService {
206
return `${ANTHROPIC_PREFIX}${model}`;
207
}
208
export function isAnthropicService(
209
service: string,
210
): service is AnthropicService {
211
return service.startsWith(ANTHROPIC_PREFIX);
212
}
213
export function fromAnthropicService(
214
service: AnthropicService,
215
): AnthropicModel {
216
if (!isAnthropicService(service)) {
217
throw new Error(`not a mistral service: ${service}`);
218
}
219
return service.slice(ANTHROPIC_PREFIX.length) as AnthropicModel;
220
}
221
222
// the hardcoded list of available language models – there are also dynamic ones, like OllamaLLM objects
223
export const LANGUAGE_MODELS = [
224
...MODELS_OPENAI,
225
...MISTRAL_MODELS,
226
...GOOGLE_MODELS,
227
...ANTHROPIC_MODELS,
228
] as const;
229
230
export const USER_SELECTABLE_LLMS_BY_VENDOR: {
231
[vendor in LLMServiceName]: Readonly<LanguageModelCore[]>;
232
} = {
233
openai: MODELS_OPENAI.filter(
234
(m) =>
235
m === "gpt-4" ||
236
m === "gpt-4-turbo-preview-8k" ||
237
m === "gpt-4o-8k" ||
238
m === "gpt-4o-mini-8k" ||
239
m === "gpt-4.1" ||
240
m === "gpt-4.1-mini",
241
242
// ATTN: there is code for o1 and o1-mini, but it does not work yet.
243
// The API changed, there is no support for streaming, and it took
244
// too much of my time trying to get it to work already.
245
// m === "o1-mini-8k" ||
246
// m === "o1-8k",
247
),
248
google: GOOGLE_MODELS.filter(
249
(m) =>
250
// we only enable 1.5 pro and 1.5 flash with a limited context window.
251
m === "gemini-1.5-pro-8k" ||
252
//m === "gemini-1.5-flash-8k" ||
253
m === "gemini-2.0-flash-8k" ||
254
m === "gemini-2.0-flash-lite-8k",
255
),
256
mistralai: MISTRAL_MODELS.filter((m) => m !== "mistral-medium-latest"),
257
anthropic: ANTHROPIC_MODELS.filter((m) => {
258
// we show opus and the context restricted models (to avoid high costs)
259
return (
260
m === "claude-3-opus-8k" ||
261
m === "claude-3-5-sonnet-4k" ||
262
m === "claude-3-haiku-8k"
263
);
264
}),
265
ollama: [], // this is empty, because these models are not hardcoded
266
custom_openai: [], // this is empty, because these models are not hardcoded]
267
user: [],
268
} as const;
269
270
// This hardcodes which models can be selected by users – refine this by setting site_settings.selectable_llms!
271
// Make sure to update this when adding new models.
272
// This is used in e.g. mentionable-users.tsx, model-switch.tsx and other-settings.tsx
273
export const USER_SELECTABLE_LANGUAGE_MODELS = [
274
...USER_SELECTABLE_LLMS_BY_VENDOR.openai,
275
...USER_SELECTABLE_LLMS_BY_VENDOR.google,
276
...USER_SELECTABLE_LLMS_BY_VENDOR.mistralai,
277
...USER_SELECTABLE_LLMS_BY_VENDOR.anthropic,
278
] as const;
279
280
export type OllamaLLM = string;
281
export type CustomOpenAI = string;
282
283
// use the one without Ollama to get stronger typing. Ollama could be any string starting with the OLLAMA_PREFIX.
284
export type LanguageModelCore = (typeof LANGUAGE_MODELS)[number];
285
export type LanguageModel = LanguageModelCore | OllamaLLM;
286
export function isCoreLanguageModel(
287
model: unknown,
288
): model is LanguageModelCore {
289
if (typeof model !== "string") return false;
290
return LANGUAGE_MODELS.includes(model as any);
291
}
292
293
// we check if the given object is any known language model
294
export function isLanguageModel(model?: unknown): model is LanguageModel {
295
if (model == null) return false;
296
if (typeof model !== "string") return false;
297
if (isOllamaLLM(model)) return true;
298
if (isCustomOpenAI(model)) return true;
299
if (isUserDefinedModel(model)) return true; // this also checks, if there is a valid model inside
300
return LANGUAGE_MODELS.includes(model as any);
301
}
302
303
export type LLMServiceName = (typeof LANGUAGE_MODEL_SERVICES)[number];
304
305
export function isLLMServiceName(service: unknown): service is LLMServiceName {
306
if (typeof service !== "string") return false;
307
return LANGUAGE_MODEL_SERVICES.includes(service as any);
308
}
309
310
export type LLMServicesAvailable = Record<LLMServiceName, boolean>;
311
312
interface LLMService {
313
name: string;
314
short: string; // additional short text next to the company name
315
desc: string; // more detailed description
316
url: string;
317
}
318
319
export const LLM_PROVIDER: { [key in LLMServiceName]: LLMService } = {
320
openai: {
321
name: "OpenAI",
322
short: "AI research and deployment company",
323
desc: "OpenAI is an AI research and deployment company. Their mission is to ensure that artificial general intelligence benefits all of humanity.",
324
url: "https://openai.com/",
325
},
326
google: {
327
name: "Google",
328
short: "Technology company",
329
desc: "Google's mission is to organize the world's information and make it universally accessible and useful.",
330
url: "https://gemini.google.com/",
331
},
332
anthropic: {
333
name: "Anthropic",
334
short: "AI research company",
335
desc: "Anthropic is an American artificial intelligence (AI) startup company, founded by former members of OpenAI.",
336
url: "https://www.anthropic.com/",
337
},
338
mistralai: {
339
name: "Mistral AI",
340
short: "French AI company",
341
desc: "Mistral AI is a French company selling artificial intelligence (AI) products.",
342
url: "https://mistral.ai/",
343
},
344
ollama: {
345
name: "Ollama",
346
short: "Open-source software",
347
desc: "Ollama language model server at a custom API endpoint.",
348
url: "https://ollama.com/",
349
},
350
custom_openai: {
351
name: "OpenAI API",
352
short: "Custom endpoint",
353
desc: "Calls a custom OpenAI API endoint.",
354
url: "https://js.langchain.com/v0.1/docs/integrations/llms/openai/",
355
},
356
user: {
357
name: "User Defined",
358
short: "Account → Language Model",
359
desc: "Defined by the user in Account Settings → Language Model",
360
url: "",
361
},
362
} as const;
363
364
interface ValidLanguageModelNameProps {
365
model: string | undefined;
366
filter: LLMServicesAvailable;
367
ollama: string[]; // keys of ollama models
368
custom_openai: string[]; // keys of custom openai models
369
selectable_llms: string[]; // either empty, or an array stored in the server settings
370
}
371
372
// NOTE: these values must be in sync with the "no" vals in db-schema/site-defaults.ts
373
const DEFAULT_FILTER: Readonly<LLMServicesAvailable> = {
374
openai: false,
375
google: false,
376
ollama: false,
377
mistralai: false,
378
anthropic: false,
379
custom_openai: false,
380
user: false,
381
} as const;
382
383
// this is used in initialization functions. e.g. to get a default model depending on the overall availability
384
// usually, this should just return the chatgpt3 model, but e.g. if neither google or openai is available,
385
// then it might even falls back to an available ollama model. It needs to return a string, though, for the frontend, etc.
386
export function getValidLanguageModelName({
387
model,
388
filter = DEFAULT_FILTER,
389
ollama,
390
custom_openai,
391
selectable_llms,
392
}: ValidLanguageModelNameProps): LanguageModel {
393
if (typeof model === "string" && isValidModel(model)) {
394
try {
395
if (isCoreLanguageModel(model)) {
396
const v = model2vendor(model).name;
397
if (filter[v] && selectable_llms.includes(model)) {
398
return model;
399
}
400
}
401
402
if (isOllamaLLM(model) && ollama.includes(fromOllamaModel(model))) {
403
return model;
404
}
405
406
if (
407
isCustomOpenAI(model) &&
408
custom_openai.includes(fromCustomOpenAIModel(model))
409
) {
410
return model;
411
}
412
413
if (isUserDefinedModel(model)) {
414
return model;
415
}
416
} catch {}
417
}
418
419
for (const free of [true, false]) {
420
const dflt = getDefaultLLM(
421
selectable_llms,
422
filter,
423
ollama,
424
custom_openai,
425
free,
426
);
427
if (dflt != null) {
428
return dflt;
429
}
430
}
431
return DEFAULT_MODEL;
432
}
433
434
export const DEFAULT_LLM_PRIORITY: Readonly<UserDefinedLLMService[]> = [
435
"google",
436
"openai",
437
"anthropic",
438
"mistralai",
439
"ollama",
440
"custom_openai",
441
] as const;
442
443
export function getDefaultLLM(
444
selectable_llms: string[],
445
filter: LLMServicesAvailable,
446
ollama?: { [key: string]: any },
447
custom_openai?: { [key: string]: any },
448
only_free = true,
449
): LanguageModel {
450
for (const v of DEFAULT_LLM_PRIORITY) {
451
if (!filter[v]) continue;
452
for (const m of USER_SELECTABLE_LLMS_BY_VENDOR[v]) {
453
if (selectable_llms.includes(m)) {
454
const isFree = LLM_COST[m].free ?? true;
455
if ((only_free && isFree) || !only_free) {
456
return m;
457
}
458
}
459
}
460
}
461
// none of the standard models, pick the first ollama or custom_openai
462
if (ollama != null && !isEmpty(ollama)) {
463
return toOllamaModel(Object.keys(ollama)[0]);
464
}
465
if (custom_openai != null && !isEmpty(custom_openai)) {
466
return toCustomOpenAIModel(Object.keys(custom_openai)[0]);
467
}
468
return DEFAULT_MODEL;
469
}
470
471
export interface OpenAIMessage {
472
role: "system" | "user" | "assistant";
473
content: string;
474
}
475
export type OpenAIMessages = OpenAIMessage[];
476
477
export const OLLAMA_PREFIX = "ollama-";
478
export type OllamaService = string;
479
export function isOllamaService(service: string): service is OllamaService {
480
return isOllamaLLM(service);
481
}
482
483
export const CUSTOM_OPENAI_PREFIX = "custom_openai-";
484
export type CustomOpenAIService = string;
485
export function isCustomOpenAIService(
486
service: string,
487
): service is CustomOpenAIService {
488
return isCustomOpenAI(service);
489
}
490
491
export const MISTRAL_PREFIX = "mistralai-";
492
export type MistralService = `${typeof MISTRAL_PREFIX}${MistralModel}`;
493
export function isMistralService(service: string): service is MistralService {
494
return service.startsWith(MISTRAL_PREFIX);
495
}
496
497
export const GOOGLE_PREFIX = "google-";
498
499
// we encode the in the frontend and elsewhere with the service name as a prefix
500
// ATTN: don't change the encoding pattern of [vendor]-[model]
501
// for whatever reason, it's also described that way in purchases/close.ts
502
export type LanguageServiceCore =
503
| `${typeof OPENAI_PREFIX}${OpenAIModel}`
504
| `${typeof GOOGLE_PREFIX}${
505
| "text-bison-001"
506
| "chat-bison-001"
507
| "embedding-gecko-001"}`
508
| `${typeof GOOGLE_PREFIX}${GoogleModel}`
509
| AnthropicService
510
| MistralService;
511
512
export type LanguageService =
513
| LanguageServiceCore
514
| OllamaService
515
| CustomOpenAIService;
516
517
// used e.g. for checking "account-id={string}" and other things like that
518
export const LANGUAGE_MODEL_PREFIXES = [
519
"chatgpt",
520
...LANGUAGE_MODEL_SERVICES.map((v) => `${v}-`),
521
] as const;
522
523
// we encode the in the frontend and elsewhere with the service name as a prefix
524
export function model2service(model: LanguageModel): LanguageService {
525
if (model === "text-embedding-ada-002") {
526
return `${OPENAI_PREFIX}${model}`;
527
}
528
if (
529
isOllamaLLM(model) ||
530
isCustomOpenAI(model) ||
531
isUserDefinedModel(model)
532
) {
533
return model; // already has a useful prefix
534
}
535
if (isMistralModel(model)) {
536
return toMistralService(model);
537
}
538
if (isAnthropicModel(model)) {
539
return toAnthropicService(model);
540
}
541
if (isLanguageModel(model)) {
542
if (
543
model === "text-bison-001" ||
544
model === "chat-bison-001" ||
545
model === "embedding-gecko-001" ||
546
isGoogleModel(model)
547
) {
548
return `${GOOGLE_PREFIX}${model}`;
549
} else {
550
return `${OPENAI_PREFIX}${model}`;
551
}
552
}
553
554
throw new Error(`unknown model: ${model}`);
555
}
556
557
// inverse of model2service, but robust for chat avatars, which might not have a prefix
558
// TODO: fix the mess
559
export function service2model(
560
service: LanguageService | "chatgpt",
561
): LanguageModel {
562
if (service === "chatgpt") {
563
return "gpt-3.5-turbo";
564
}
565
const lm = service2model_core(service);
566
if (lm == null) {
567
// We don't throw an error, since the frontend would crash
568
// throw new Error(`unknown service: ${service}`);
569
console.warn(`service2model: unknown service: ${service}`);
570
return "gpt-3.5-turbo";
571
}
572
return lm;
573
}
574
575
export function service2model_core(
576
service: LanguageService,
577
): LanguageModel | null {
578
// split off the first part of service, e.g., "openai-" or "google-"
579
const s = service.split("-")[0];
580
const hasPrefix = LANGUAGE_MODEL_SERVICES.some((v) => s === v);
581
582
if (isUserDefinedModel(service)) {
583
return service;
584
}
585
586
const m = hasPrefix ? service.split("-").slice(1).join("-") : service;
587
if (hasPrefix) {
588
// we add the trailing "-" to match with these prefixes, which include the "-"
589
switch (`${s}-`) {
590
case OLLAMA_PREFIX:
591
return toOllamaModel(m);
592
case CUSTOM_OPENAI_PREFIX:
593
return toCustomOpenAIModel(m);
594
}
595
}
596
597
if (LANGUAGE_MODELS.includes(m as any)) {
598
return m;
599
}
600
return null;
601
}
602
603
// NOTE: do not use this – instead use server_settings.default_llm
604
export const DEFAULT_MODEL: LanguageModel = "gemini-2.0-flash-8k";
605
606
interface LLMVendor {
607
name: LLMServiceName;
608
url: string;
609
}
610
611
export function model2vendor(model): LLMVendor {
612
if (isUserDefinedModel(model)) {
613
return { name: "user", url: "" };
614
} else if (isOllamaLLM(model)) {
615
return { name: "ollama", url: LLM_PROVIDER.ollama.url };
616
} else if (isCustomOpenAI(model)) {
617
return {
618
name: "custom_openai",
619
url: LLM_PROVIDER.custom_openai.url,
620
};
621
} else if (isMistralModel(model)) {
622
return { name: "mistralai", url: LLM_PROVIDER.mistralai.url };
623
} else if (isOpenAIModel(model)) {
624
return { name: "openai", url: LLM_PROVIDER.openai.url };
625
} else if (isGoogleModel(model)) {
626
return { name: "google", url: LLM_PROVIDER.google.url };
627
} else if (isAnthropicModel(model)) {
628
return { name: "anthropic", url: LLM_PROVIDER.anthropic.url };
629
}
630
631
throw new Error(`model2vendor: unknown model: "${model}"`);
632
}
633
634
// wraps the model name in an object that indicates that it's an ollama model
635
// TODO: maybe it will be necessary at some point to pass in the list of available ollama models
636
// TODO: in the future, this object will also contain info like the max tokens and other parameters (from the DB)
637
export function toOllamaModel(model: string): OllamaLLM {
638
if (isOllamaLLM(model)) {
639
throw new Error(`already an ollama model: ${model}`);
640
}
641
return `${OLLAMA_PREFIX}${model}`;
642
}
643
644
// unwraps the model name from an object that indicates that it's an ollama model
645
export function fromOllamaModel(model: OllamaLLM) {
646
if (!isOllamaLLM(model)) {
647
throw new Error(`not an ollama model: ${model}`);
648
}
649
return model.slice(OLLAMA_PREFIX.length);
650
}
651
652
export function isOllamaLLM(model: unknown): model is OllamaLLM {
653
return (
654
typeof model === "string" &&
655
model.startsWith(OLLAMA_PREFIX) &&
656
model.length > OLLAMA_PREFIX.length
657
);
658
}
659
660
export function toCustomOpenAIModel(model: string): CustomOpenAI {
661
if (isCustomOpenAI(model)) {
662
throw new Error(`already a custom openai model: ${model}`);
663
}
664
return `${CUSTOM_OPENAI_PREFIX}${model}`;
665
}
666
667
export function isCustomOpenAI(model: unknown): model is CustomOpenAI {
668
return (
669
typeof model === "string" &&
670
model.startsWith(CUSTOM_OPENAI_PREFIX) &&
671
model.length > CUSTOM_OPENAI_PREFIX.length
672
);
673
}
674
675
export function fromCustomOpenAIModel(model: CustomOpenAI) {
676
if (!isCustomOpenAI(model)) {
677
throw new Error(`not a custom openai model: ${model}`);
678
}
679
return model.slice(CUSTOM_OPENAI_PREFIX.length);
680
}
681
682
export function toMistralService(model: string): MistralService {
683
if (isMistralService(model)) {
684
throw new Error(`already a mistral model: ${model}`);
685
}
686
if (!isMistralModel(model)) {
687
throw new Error(`not a mistral model: ${model}`);
688
}
689
return `${MISTRAL_PREFIX}${model}`;
690
}
691
692
export function fromMistralService(model: MistralService) {
693
if (!isMistralService(model)) {
694
throw new Error(`not a mistral model: ${model}`);
695
}
696
return model.slice(MISTRAL_PREFIX.length);
697
}
698
699
type LLM2String = {
700
[key in
701
| (typeof USER_SELECTABLE_LANGUAGE_MODELS)[number]
702
| "chatgpt" // some additional ones, backwards compatibility
703
| "chatgpt3"
704
| "chatgpt4"
705
| "gpt-4-32k"
706
| "text-bison-001"
707
| "chat-bison-001"]: string;
708
};
709
710
// Map from psuedo account_id to what should be displayed to user.
711
// This is used in various places in the frontend.
712
// Google PaLM: https://cloud.google.com/vertex-ai/docs/generative-ai/pricing
713
export const LLM_USERNAMES: LLM2String = {
714
chatgpt: "GPT-3.5",
715
chatgpt3: "GPT-3.5",
716
chatgpt4: "GPT-4",
717
"gpt-4": "GPT-4",
718
"gpt-4-32k": "GPT-4-32k",
719
"gpt-3.5-turbo": "GPT-3.5",
720
"gpt-3.5-turbo-16k": "GPT-3.5-16k",
721
"gpt-4-turbo-preview": "GPT-4 Turbo 128k",
722
"gpt-4-turbo-preview-8k": "GPT-4 Turbo",
723
"gpt-4-turbo": "GPT-4 Turbo 128k",
724
"gpt-4-turbo-8k": "GPT-4 Turbo",
725
"gpt-4o": "GPT-4o 128k",
726
"gpt-4o-8k": "GPT-4o",
727
"gpt-4o-mini": "GPT-4o Mini 128k",
728
"gpt-4o-mini-8k": "GPT-4o Mini",
729
"gpt-4.1": "GPT-4.1",
730
"gpt-4.1-mini": "GPT-4.1 Mini",
731
"o1-mini-8k": "OpenAI o1-mini",
732
"o1-8k": "OpenAI o1",
733
"o1-mini": "OpenAI o1-mini",
734
o1: "OpenAI o1",
735
"text-embedding-ada-002": "Text Embedding Ada 002", // TODO: this is for embeddings, should be moved to a different place
736
"text-bison-001": "PaLM 2",
737
"chat-bison-001": "PaLM 2",
738
"gemini-pro": "Gemini 1.0 Pro",
739
"gemini-1.0-ultra": "Gemini 1.0 Ultra",
740
"gemini-1.5-pro": "Gemini 1.5 Pro 1m",
741
"gemini-1.5-pro-8k": "Gemini 1.5 Pro",
742
"gemini-1.5-flash-8k": "Gemini 1.5 Flash",
743
"gemini-2.0-flash-8k": "Gemini 2.0 Flash",
744
"gemini-2.0-flash-lite-8k": "Gemini 2.0 Flash Lite",
745
"mistral-small-latest": "Mistral AI Small",
746
"mistral-medium-latest": "Mistral AI Medium",
747
"mistral-large-latest": "Mistral AI Large",
748
"claude-3-haiku": "Claude 3 Haiku 200k",
749
"claude-3-haiku-8k": "Claude 3 Haiku",
750
"claude-3-sonnet": "Claude 3 Sonnet 200k",
751
"claude-3-sonnet-4k": "Claude 3 Sonnet",
752
"claude-3-5-sonnet": "Claude 3.5 Sonnet 200k",
753
"claude-3-5-sonnet-4k": "Claude 3.5 Sonnet",
754
"claude-3-opus": "Claude 3 Opus 200k",
755
"claude-3-opus-8k": "Claude 3 Opus",
756
} as const;
757
758
// similar to the above, we map to short user-visible description texts
759
// this comes next to the name, hence you do not have to mention the name
760
export const LLM_DESCR: LLM2String = {
761
chatgpt: "Fast, great for everyday tasks. (OpenAI, 4k token context)",
762
chatgpt3: "Fast, great for everyday tasks. (OpenAI, 4k token context)",
763
chatgpt4:
764
"Can follow complex instructions and solve difficult problems. (OpenAI, 8k token context)",
765
"gpt-4":
766
"Powerful OpenAI model. Can follow complex instructions and solve difficult problems. (OpenAI, 8k token context)",
767
"gpt-4.1":
768
"Powerful OpenAI model. Can follow complex instructions and solve difficult problems. (OpenAI, 8k token context)",
769
"gpt-4-32k": "",
770
"gpt-3.5-turbo": "Fast, great for everyday tasks. (OpenAI, 4k token context)",
771
"gpt-3.5-turbo-16k": `Same as ${LLM_USERNAMES["gpt-3.5-turbo"]} but with larger 16k token context`,
772
"gpt-4-turbo-preview-8k":
773
"More powerful, fresher knowledge, and lower price than GPT-4. (OpenAI, 8k token context)",
774
"gpt-4-turbo-preview": "Like GPT-4 Turbo, but with up to 128k token context",
775
"gpt-4-turbo-8k":
776
"Faster, fresher knowledge, and lower price than GPT-4. (OpenAI, 8k token context)",
777
"gpt-4-turbo": "Like GPT-4 Turbo, but with up to 128k token context",
778
"gpt-4o-8k":
779
"Most powerful, fastest, and cheapest (OpenAI, 8k token context)",
780
"gpt-4o": "Most powerful fastest, and cheapest (OpenAI, 128k token context)",
781
"gpt-4o-mini-8k":
782
"Most cost-efficient small model (OpenAI, 8k token context)",
783
"gpt-4.1-mini": "Most cost-efficient small model (OpenAI, 8k token context)",
784
"gpt-4o-mini": "Most cost-efficient small model (OpenAI, 128k token context)",
785
"text-embedding-ada-002": "Text embedding Ada 002 by OpenAI", // TODO: this is for embeddings, should be moved to a different place
786
"o1-8k": "Spends more time thinking (8k token context)",
787
"o1-mini-8k": "A cost-efficient reasoning model (8k token context)",
788
o1: "Spends more time thinking (8k token context)",
789
"o1-mini": "A cost-efficient reasoning model (8k token context)",
790
"text-bison-001": "",
791
"chat-bison-001": "",
792
"gemini-pro":
793
"Google's Gemini 1.0 Pro Generative AI model (30k token context)",
794
"gemini-1.0-ultra":
795
"Google's Gemini 1.0 Ultra Generative AI model (30k token context)",
796
"gemini-1.5-pro":
797
"Google's Gemini 1.5 Pro Generative AI model (1m token context)",
798
"gemini-1.5-pro-8k":
799
"Google's Gemini 1.5 Pro Generative AI model (8k token context)",
800
"gemini-1.5-flash-8k":
801
"Google's Gemini 1.5 Flash Generative AI model (8k token context)",
802
"gemini-2.0-flash-8k":
803
"Google's Gemini 2.0 Flash Generative AI model (8k token context)",
804
"gemini-2.0-flash-lite-8k":
805
"Google's Gemini 2.0 Flash Lite Generative AI model (8k token context)",
806
"mistral-small-latest":
807
"Fast, simple queries, short answers, less capabilities. (Mistral AI, 4k token context)",
808
"mistral-medium-latest":
809
"Intermediate tasks, summarizing, generating documents, etc. (Mistral AI, 4k token context)",
810
"mistral-large-latest":
811
"Most powerful, large reasoning capabilities, but slower. (Mistral AI, 4k token context)",
812
"claude-3-haiku":
813
"Fastest model, lightweight actions (Anthropic, 200k token context)",
814
"claude-3-haiku-8k":
815
"Fastest model, lightweight actions (Anthropic, 8k token context)",
816
"claude-3-5-sonnet":
817
"Our most intelligent model (Anthropic, 200k token context)",
818
"claude-3-5-sonnet-4k":
819
"Our most intelligent model (Anthropic, 4k token context)",
820
"claude-3-sonnet":
821
"Best combination of performance and speed (Anthropic, 200k token context)",
822
"claude-3-sonnet-4k":
823
"Best combination of performance and speed (Anthropic, 4k token context)",
824
"claude-3-opus":
825
"Excels at writing and complex tasks (Anthropic, 200k token context)",
826
"claude-3-opus-8k":
827
"Excels at writing and complex tasks (Anthropic, 8k token context)",
828
} as const;
829
830
export function isFreeModel(model: unknown, isCoCalcCom: boolean): boolean {
831
if (!isCoCalcCom) return true;
832
if (isUserDefinedModel(model)) return true;
833
if (isOllamaLLM(model)) return true;
834
if (isCustomOpenAI(model)) return true;
835
if (typeof model === "string" && LANGUAGE_MODELS.includes(model as any)) {
836
// i.e. model is now of type CoreLanguageModel and
837
const costInfo = LLM_COST[model];
838
if (costInfo != null) {
839
return costInfo.free;
840
}
841
}
842
// all others are free (this should actually never happen, but we're cautious)
843
return true;
844
}
845
846
// this is used in purchases/get-service-cost
847
// we only need to check for the vendor prefixes, no special cases!
848
export function isLanguageModelService(
849
service: string,
850
): service is LanguageService {
851
if (isUserDefinedModel(service)) return true;
852
for (const v of LANGUAGE_MODEL_SERVICES) {
853
if (service.startsWith(`${v}-`)) {
854
return true;
855
}
856
}
857
return false;
858
}
859
860
export function getLLMServiceStatusCheckMD(service: LLMServiceName): string {
861
switch (service) {
862
case "openai":
863
return `OpenAI [status](https://status.openai.com) and [downdetector](https://downdetector.com/status/openai).`;
864
case "google":
865
return `Google [status](https://status.cloud.google.com) and [downdetector](https://downdetector.com/status/google-cloud).`;
866
case "ollama":
867
return `No status information for Ollama available.`;
868
case "custom_openai":
869
return `No status information for Custom OpenAI available.`;
870
case "mistralai":
871
return `No status information for Mistral AI available.`;
872
case "anthropic":
873
return `Anthropic [status](https://status.anthropic.com/).`;
874
case "user":
875
return `No status information for user defined model available.`;
876
default:
877
unreachable(service);
878
}
879
return "";
880
}
881
882
interface Cost {
883
prompt_tokens: number;
884
completion_tokens: number;
885
max_tokens: number;
886
free: boolean; // whether this model has a metered paid usage, or offered for free
887
}
888
889
// price per token for a given price of USD per 1M tokens
890
function usd1Mtokens(usd: number): number {
891
return usd / 1_000_000;
892
}
893
894
// This is the official published cost that openai charges.
895
// It changes over time, so this will sometimes need to be updated.
896
// Our cost is a configurable multiple of this.
897
// https://openai.com/pricing#language-models
898
// There appears to be no api that provides the prices, unfortunately.
899
export const LLM_COST: { [name in LanguageModelCore]: Cost } = {
900
"gpt-4": {
901
prompt_tokens: usd1Mtokens(30),
902
completion_tokens: usd1Mtokens(60),
903
max_tokens: 8192,
904
free: false,
905
},
906
"gpt-4-32k": {
907
prompt_tokens: usd1Mtokens(60),
908
completion_tokens: usd1Mtokens(120),
909
max_tokens: 32768,
910
free: false,
911
},
912
"gpt-3.5-turbo": {
913
prompt_tokens: usd1Mtokens(3),
914
completion_tokens: usd1Mtokens(6),
915
max_tokens: 4096,
916
free: true,
917
},
918
"gpt-3.5-turbo-16k": {
919
prompt_tokens: usd1Mtokens(3),
920
completion_tokens: usd1Mtokens(6),
921
max_tokens: 16384,
922
free: false,
923
},
924
// like above, but we limit the tokens to reduce how much money user has to commit to
925
"gpt-4-turbo-preview-8k": {
926
prompt_tokens: usd1Mtokens(10),
927
completion_tokens: usd1Mtokens(30),
928
max_tokens: 8192, // the actual reply is 8k, and we use this to truncate the input prompt!
929
free: false,
930
},
931
"gpt-4-turbo-preview": {
932
prompt_tokens: usd1Mtokens(10), // $10.00 / 1M tokens
933
completion_tokens: usd1Mtokens(30), // $30.00 / 1M tokens
934
max_tokens: 128000, // This is a lot: blows up the "max cost" calculation → requires raising the minimum balance and quota limit
935
free: false,
936
}, // like above, but we limit the tokens to reduce how much money user has to commit to
937
"gpt-4-turbo-8k": {
938
prompt_tokens: usd1Mtokens(10),
939
completion_tokens: usd1Mtokens(30),
940
max_tokens: 8192, // the actual reply is 8k, and we use this to truncate the input prompt!
941
free: false,
942
},
943
"gpt-4-turbo": {
944
prompt_tokens: usd1Mtokens(10), // $10.00 / 1M tokens
945
completion_tokens: usd1Mtokens(30), // $30.00 / 1M tokens
946
max_tokens: 128000, // This is a lot: blows up the "max cost" calculation → requires raising the minimum balance and quota limit
947
free: false,
948
},
949
"gpt-4.1": {
950
prompt_tokens: usd1Mtokens(2),
951
completion_tokens: usd1Mtokens(8),
952
max_tokens: 8192,
953
free: false,
954
},
955
"gpt-4.1-mini": {
956
prompt_tokens: usd1Mtokens(0.4),
957
completion_tokens: usd1Mtokens(1.6),
958
max_tokens: 8192,
959
free: true,
960
},
961
"gpt-4o-8k": {
962
prompt_tokens: usd1Mtokens(2.5),
963
completion_tokens: usd1Mtokens(10),
964
max_tokens: 8192, // like gpt-4-turbo-8k
965
free: false,
966
},
967
"gpt-4o": {
968
prompt_tokens: usd1Mtokens(2.5),
969
completion_tokens: usd1Mtokens(10),
970
max_tokens: 128000, // This is a lot: blows up the "max cost" calculation → requires raising the minimum balance and quota limit
971
free: false,
972
},
973
"gpt-4o-mini-8k": {
974
prompt_tokens: usd1Mtokens(0.15),
975
completion_tokens: usd1Mtokens(0.6),
976
max_tokens: 8192, // like gpt-4-turbo-8k
977
free: true,
978
},
979
"gpt-4o-mini": {
980
prompt_tokens: usd1Mtokens(0.15),
981
completion_tokens: usd1Mtokens(0.6),
982
max_tokens: 128000, // This is a lot: blows up the "max cost" calculation → requires raising the minimum balance and quota limit
983
free: true,
984
},
985
o1: {
986
prompt_tokens: usd1Mtokens(15),
987
completion_tokens: usd1Mtokens(7.5),
988
max_tokens: 8192, // like gpt-4-turbo-8k
989
free: false,
990
},
991
"o1-mini": {
992
prompt_tokens: usd1Mtokens(3),
993
completion_tokens: usd1Mtokens(1.5),
994
max_tokens: 8192, // like gpt-4-turbo-8k
995
free: false,
996
},
997
"o1-8k": {
998
prompt_tokens: usd1Mtokens(15),
999
completion_tokens: usd1Mtokens(7.5),
1000
max_tokens: 8192, // like gpt-4-turbo-8k
1001
free: false,
1002
},
1003
"o1-mini-8k": {
1004
prompt_tokens: usd1Mtokens(3),
1005
completion_tokens: usd1Mtokens(1.5),
1006
max_tokens: 8192, // like gpt-4-turbo-8k
1007
free: false,
1008
},
1009
// also OpenAI
1010
"text-embedding-ada-002": {
1011
prompt_tokens: 0.0001 / 1000,
1012
completion_tokens: 0.0001 / 1000, // NOTE: this isn't a thing with embeddings
1013
max_tokens: 8191,
1014
free: false,
1015
},
1016
// https://ai.google.dev/pricing
1017
"gemini-pro": {
1018
prompt_tokens: usd1Mtokens(0.5),
1019
completion_tokens: usd1Mtokens(1.5),
1020
max_tokens: 30720,
1021
free: true,
1022
},
1023
"gemini-1.5-pro-8k": {
1024
prompt_tokens: usd1Mtokens(1.25), // (we're below the 128k context)
1025
completion_tokens: usd1Mtokens(5),
1026
max_tokens: 8_000,
1027
free: false,
1028
},
1029
"gemini-1.5-pro": {
1030
prompt_tokens: usd1Mtokens(2.5),
1031
completion_tokens: usd1Mtokens(10),
1032
max_tokens: 1048576,
1033
free: false,
1034
},
1035
"gemini-1.0-ultra": {
1036
prompt_tokens: usd1Mtokens(1), // TODO: price not yet known!
1037
completion_tokens: usd1Mtokens(1),
1038
max_tokens: 30720,
1039
free: true,
1040
},
1041
"gemini-1.5-flash-8k": {
1042
prompt_tokens: usd1Mtokens(0.075),
1043
completion_tokens: usd1Mtokens(0.3),
1044
max_tokens: 8_000,
1045
free: true,
1046
},
1047
// https://ai.google.dev/gemini-api/docs/pricing?hl=de
1048
"gemini-2.0-flash-8k": {
1049
prompt_tokens: usd1Mtokens(0.1),
1050
completion_tokens: usd1Mtokens(0.4),
1051
max_tokens: 8_000,
1052
free: true,
1053
},
1054
"gemini-2.0-flash-lite-8k": {
1055
prompt_tokens: usd1Mtokens(0.075),
1056
completion_tokens: usd1Mtokens(0.3),
1057
max_tokens: 8_000,
1058
free: true,
1059
},
1060
// https://mistral.ai/technology/
1061
"mistral-small-latest": {
1062
prompt_tokens: usd1Mtokens(0.2),
1063
completion_tokens: usd1Mtokens(0.6),
1064
max_tokens: 4096, // TODO don't know the real value, see getMaxTokens
1065
free: true,
1066
},
1067
"mistral-medium-latest": {
1068
prompt_tokens: usd1Mtokens(2.7),
1069
completion_tokens: usd1Mtokens(8.1),
1070
max_tokens: 4096, // TODO don't know the real value, see getMaxTokens
1071
free: true,
1072
},
1073
"mistral-large-latest": {
1074
prompt_tokens: usd1Mtokens(2),
1075
completion_tokens: usd1Mtokens(6),
1076
max_tokens: 4096, // TODO don't know the real value, see getMaxTokens
1077
free: false,
1078
},
1079
// Anthropic: pricing somewhere on that page: https://www.anthropic.com/api
1080
"claude-3-opus-8k": {
1081
prompt_tokens: usd1Mtokens(15),
1082
completion_tokens: usd1Mtokens(75),
1083
max_tokens: 8_000, // limited to 8k tokens, to reduce the necessary spend limit to commit to
1084
free: false,
1085
},
1086
"claude-3-opus": {
1087
prompt_tokens: usd1Mtokens(15),
1088
completion_tokens: usd1Mtokens(75),
1089
max_tokens: 200_000,
1090
free: false,
1091
},
1092
"claude-3-5-sonnet": {
1093
prompt_tokens: usd1Mtokens(3),
1094
completion_tokens: usd1Mtokens(15),
1095
max_tokens: 200_000,
1096
free: false,
1097
},
1098
"claude-3-5-sonnet-4k": {
1099
prompt_tokens: usd1Mtokens(3),
1100
completion_tokens: usd1Mtokens(15),
1101
max_tokens: 4_000, // limited to 4k tokens
1102
free: false,
1103
},
1104
"claude-3-sonnet-4k": {
1105
prompt_tokens: usd1Mtokens(3),
1106
completion_tokens: usd1Mtokens(15),
1107
max_tokens: 4_000, // limited to 4k tokens, offered for free
1108
free: true,
1109
},
1110
"claude-3-sonnet": {
1111
prompt_tokens: usd1Mtokens(3),
1112
completion_tokens: usd1Mtokens(15),
1113
max_tokens: 200_000,
1114
free: false,
1115
},
1116
"claude-3-haiku-8k": {
1117
prompt_tokens: usd1Mtokens(0.8),
1118
completion_tokens: usd1Mtokens(4),
1119
max_tokens: 8_000, // limited to 8k tokens, offered for free
1120
free: true,
1121
},
1122
"claude-3-haiku": {
1123
prompt_tokens: usd1Mtokens(0.8),
1124
completion_tokens: usd1Mtokens(4),
1125
max_tokens: 200_000,
1126
free: false,
1127
},
1128
} as const;
1129
1130
// TODO: remove this test – it's only used server side, and that server side check should work for all known LLM models
1131
export function isValidModel(model?: string): boolean {
1132
if (model == null) return false;
1133
if (isUserDefinedModel(model)) return true;
1134
if (isOllamaLLM(model)) return true;
1135
if (isCustomOpenAI(model)) return true;
1136
if (isMistralModel(model)) return true;
1137
if (isGoogleModel(model)) return true;
1138
return LLM_COST[model ?? ""] != null;
1139
}
1140
1141
export function getMaxTokens(model?: LanguageModel): number {
1142
// TODO: store max tokens in the model object itself, this is just a fallback
1143
if (isOllamaLLM(model)) return 8192;
1144
if (isMistralModel(model)) return 4096; // TODO: check with MistralAI
1145
return LLM_COST[model ?? ""]?.max_tokens ?? 4096;
1146
}
1147
1148
export interface LLMCost {
1149
prompt_tokens: number;
1150
completion_tokens: number;
1151
}
1152
1153
export function getLLMCost(
1154
model: LanguageModelCore,
1155
markup_percentage: number, // a number like "30" would mean that we increase the wholesale price by multiplying by 1.3
1156
): LLMCost {
1157
const x = LLM_COST[model];
1158
if (x == null) {
1159
throw Error(`unknown model "${model}"`);
1160
}
1161
const { prompt_tokens, completion_tokens } = x;
1162
if (markup_percentage < 0) {
1163
throw Error("markup percentage can't be negative");
1164
}
1165
const f = 1 + markup_percentage / 100;
1166
return {
1167
prompt_tokens: prompt_tokens * f,
1168
completion_tokens: completion_tokens * f,
1169
};
1170
}
1171
1172
const priceRangeCache = new LRU<string, ReturnType<typeof getLLMPriceRange>>({
1173
max: 10,
1174
});
1175
1176
export function getLLMPriceRange(
1177
prompt: number,
1178
output: number,
1179
markup_percentage: number,
1180
): { min: number; max: number } {
1181
const cacheKey = `${prompt}::${output}::${markup_percentage}`;
1182
const cached = priceRangeCache.get(cacheKey);
1183
if (cached) return cached;
1184
1185
let min = Infinity;
1186
let max = 0;
1187
for (const key in LLM_COST) {
1188
const model = LLM_COST[key];
1189
if (!model || isFreeModel(key, true)) continue;
1190
const { prompt_tokens, completion_tokens } = getLLMCost(
1191
key as LanguageModelCore,
1192
markup_percentage,
1193
);
1194
const p = prompt * prompt_tokens + output * completion_tokens;
1195
1196
min = Math.min(min, p);
1197
max = Math.max(max, p);
1198
}
1199
const ret = { min, max };
1200
priceRangeCache.set(cacheKey, ret);
1201
return ret;
1202
}
1203
1204
// The maximum cost for one single call using the given model.
1205
// We can't know the cost until after it happens, so this bound is useful for
1206
// ensuring user can afford to make a call.
1207
export function getMaxCost(
1208
model: LanguageModelCore,
1209
markup_percentage: number,
1210
): number {
1211
const { prompt_tokens, completion_tokens } = getLLMCost(
1212
model,
1213
markup_percentage,
1214
);
1215
const { max_tokens } = LLM_COST[model];
1216
return Math.max(prompt_tokens, completion_tokens) * max_tokens;
1217
}
1218
1219
/**
1220
* Initially, we just had one system promt for all LLMs.
1221
* This was tuned for the ChatGPTs by OpenAI, but breaks down for others.
1222
* For example, Gemini and Mistral are confused by mentioning "CoCalc" and insert code cells for all kinds of questions.
1223
*/
1224
export function getSystemPrompt(
1225
model: LanguageModel,
1226
_path: string | undefined,
1227
) {
1228
// TODO: for now, path is ignored. We might want to use it to customize the prompt in the future.
1229
const common = "Be brief.";
1230
const math = "Enclose any math formulas in $.";
1231
1232
if (
1233
model2vendor(model).name === "openai" ||
1234
model.startsWith(OPENAI_PREFIX)
1235
) {
1236
const mdCode =
1237
"Include the language directly after the triple backticks in all markdown code blocks.";
1238
return `Assume full access to CoCalc and using CoCalc right now.\n${mdCode}\n${math}\n${common}`;
1239
}
1240
1241
// mistral stupidly inserts anything mentioned in the prompt as examples, always.
1242
if (
1243
model2vendor(model).name === "mistralai" ||
1244
model.startsWith(MISTRAL_PREFIX)
1245
) {
1246
return common;
1247
}
1248
1249
if (
1250
model2vendor(model).name === "google" ||
1251
model.startsWith(GOOGLE_PREFIX)
1252
) {
1253
return `${math}\n${common}`;
1254
}
1255
1256
if (
1257
model2vendor(model).name === "ollama" ||
1258
model.startsWith(OLLAMA_PREFIX)
1259
) {
1260
return `${common}`;
1261
}
1262
1263
if (
1264
model2vendor(model).name === "anthropic" ||
1265
model.startsWith(ANTHROPIC_PREFIX)
1266
) {
1267
return `${math}\n${common}`;
1268
}
1269
1270
const mdCode = `Any code blocks in triple backticks should mention the language after the first backticks. For example \`\`\`python\nprint("Hello, World!")\n\`\`\``;
1271
return `${mdCode}\n${math}\n${common}`;
1272
}
1273
1274