Path: blob/master/src/packages/util/db-schema/llm-utils.ts
1447 views
// this contains bits and pieces from the wrongly named openai.ts file12import { isEmpty } from "lodash";3import LRU from "lru-cache";45import { unreachable } from "@cocalc/util/misc";67// these can be defined by admins and users8export const SERVICES = [9"openai",10"google",11"mistralai", // the "*ai" is deliberately, because their model names start with "mistral-..." and we have to distinguish it from the prefix12"anthropic",13"ollama",14"custom_openai",15] as const;1617// a "user-*" model is a wrapper for all the model services18export const LANGUAGE_MODEL_SERVICES = [...SERVICES, "user"] as const;1920export type UserDefinedLLMService = (typeof SERVICES)[number];2122export function isUserDefinedModelType(23model: unknown,24): model is UserDefinedLLMService {25return SERVICES.includes(model as any);26}2728// "User LLMs" are defined in the user's account settings.29// They query an external LLM service of given type, endpoint, and API key.30export interface UserDefinedLLM {31id: number; // a unique number32service: UserDefinedLLMService;33model: string; // non-empty string34display: string; // short user-visible string35endpoint: string; // URL to the LLM service36apiKey: string;37icon?: string; // https://.../...png38}3940export const USER_LLM_PREFIX = "user-";4142// This basically prefixes the "model" defined by the user with the USER and service prefix.43// We do not use the to*() functions, because the names of the models could be arbitrary – for each service44export function toUserLLMModelName(llm: UserDefinedLLM) {45const { service } = llm;46const model: string = (() => {47switch (service) {48case "custom_openai":49return `${CUSTOM_OPENAI_PREFIX}${llm.model}`;50case "ollama":51return toOllamaModel(llm.model);52case "anthropic":53return `${ANTHROPIC_PREFIX}${llm.model}`;54case "google":55return `${GOOGLE_PREFIX}${llm.model}`;56case "mistralai":57return `${MISTRAL_PREFIX}${llm.model}`;58case "openai":59return `${OPENAI_PREFIX}${llm.model}`;60default:61unreachable(service);62throw new Error(63`toUserLLMModelName of service ${service} not supported`,64);65}66})();67return `${USER_LLM_PREFIX}${model}`;68}6970export function fromUserDefinedLLMModel(m: string): string | null {71if (isUserDefinedModel(m)) {72return m.slice(USER_LLM_PREFIX.length);73}74return null;75}7677export function isUserDefinedModel(model: unknown): boolean {78if (typeof model !== "string") return false;79if (model.startsWith(USER_LLM_PREFIX)) {80const m2 = model.slice(USER_LLM_PREFIX.length);81return SERVICES.some((svc) => m2.startsWith(`${svc}-`));82}83return false;84}8586export function unpackUserDefinedLLMModel(model: string): {87service: UserDefinedLLMService;88model: string;89} | null {90const um = fromUserDefinedLLMModel(model);91if (um === null) return null;92for (const service of SERVICES) {93if (um.startsWith(`${service}-`)) {94return { service, model: um.slice(service.length + 1) };95}96}97return null;98}99100export const OPENAI_PREFIX = "openai-";101102// NOTE: all arrays of model names should order them by the "simples and fastest" to the "complex, slowest, most expensive"103// that way, the ordering the UI isn't looking arbitrary, but has a clear logic104105export const MODELS_OPENAI = [106"gpt-3.5-turbo",107"gpt-4o-mini-8k", // context limited108"gpt-4o-mini", // Released 2024-07-18109"gpt-4o-8k", // context limited, similar to gpt-4-turbo-8k110"gpt-4o", // Released 2024-05-13111// the "preview" variants are disabled, because the preview is over112"gpt-4-turbo-preview-8k", // like below, but artificially limited to 8k tokens113"gpt-4-turbo-preview",114"gpt-4-turbo-8k", // Released 2024-04-11115"gpt-4-turbo",116"gpt-4",117"gpt-4.1",118"gpt-4.1-mini",119"gpt-4-32k",120"gpt-3.5-turbo-16k",121"text-embedding-ada-002", // TODO: this is for embeddings, should be moved to a different place122"o1-mini-8k",123"o1-mini",124"o1-8k",125"o1",126] as const;127128export type OpenAIModel = (typeof MODELS_OPENAI)[number];129130export function isOpenAIModel(model: unknown): model is OpenAIModel {131return MODELS_OPENAI.includes(model as any);132}133134// ATTN: when you modify this list, also change frontend/.../llm/llm-selector.tsx!135export const MISTRAL_MODELS = [136// yes, all 3 of them have an extra mistral-prefix, on top of the vendor prefix137"mistral-small-latest",138"mistral-medium-latest", // Deprecated!139"mistral-large-latest",140] as const;141142export type MistralModel = (typeof MISTRAL_MODELS)[number];143144export function isMistralModel(model: unknown): model is MistralModel {145return MISTRAL_MODELS.includes(model as any);146}147148// google's are taken from here – we use the generative AI client lib149// https://developers.generativeai.google/models/language150// $ curl -s "https://generativelanguage.googleapis.com/v1beta/models?key=$GOOGLE_GENAI" | jq151export const GOOGLE_MODELS = [152"gemini-1.5-flash-8k", // introduced 2024-05-15153"gemini-pro", // Discontinued Feb'25. Keep it to avoid breaking old references!154"gemini-1.0-ultra", // hangs155"gemini-1.5-pro-8k", // works now with langchaing156"gemini-1.5-pro", // works now with langchaing157"gemini-2.0-flash-8k",158"gemini-2.0-flash-lite-8k",159] as const;160export type GoogleModel = (typeof GOOGLE_MODELS)[number];161export function isGoogleModel(model: unknown): model is GoogleModel {162return GOOGLE_MODELS.includes(model as any);163}164export const GOOGLE_MODEL_TO_ID: Partial<{ [m in GoogleModel]: string }> = {165"gemini-1.5-pro": "gemini-1.5-pro-latest",166"gemini-1.5-pro-8k": "gemini-1.5-pro-latest",167"gemini-1.5-flash-8k": "gemini-1.5-flash-latest",168"gemini-2.0-flash-8k": "gemini-2.0-flash",169"gemini-2.0-flash-lite-8k": "gemini-2.0-flash-lite",170} as const;171172// https://docs.anthropic.com/claude/docs/models-overview -- stable names for the modesl ...173export const ANTHROPIC_MODELS = [174"claude-3-5-sonnet",175"claude-3-5-sonnet-4k", // added 2024-06-24176"claude-3-haiku",177"claude-3-haiku-8k", // limited context window, offered for free178"claude-3-sonnet",179"claude-3-sonnet-4k", // limited context window, offered for free180"claude-3-opus-8k", // same issue as the large GPT models, limit the context window to limit spending181"claude-3-opus",182] as const;183const CLAUDE_SONNET_VERSION = "20240229";184const CLAUDE_HAIKU_VERSION = "20240307";185const CLAUDE_OPUS_VERSION = "20240229";186const CLAUDE_SONNET_3_5_VERSION = "20240620";187// ... and we add a version number (there is no "*-latest") when dispatching on the backend188export const ANTHROPIC_VERSION: { [name in AnthropicModel]: string } = {189"claude-3-sonnet-4k": CLAUDE_SONNET_VERSION,190"claude-3-opus": CLAUDE_OPUS_VERSION,191"claude-3-opus-8k": CLAUDE_OPUS_VERSION,192"claude-3-sonnet": CLAUDE_SONNET_VERSION,193"claude-3-5-sonnet": CLAUDE_SONNET_3_5_VERSION,194"claude-3-5-sonnet-4k": CLAUDE_SONNET_3_5_VERSION,195"claude-3-haiku": CLAUDE_HAIKU_VERSION,196"claude-3-haiku-8k": CLAUDE_HAIKU_VERSION,197} as const;198export const ANTHROPIC_PREFIX = "anthropic-";199export type AnthropicModel = (typeof ANTHROPIC_MODELS)[number];200type AnthropicService = `${typeof ANTHROPIC_PREFIX}${AnthropicModel}`;201export function isAnthropicModel(model: unknown): model is AnthropicModel {202return ANTHROPIC_MODELS.includes(model as any);203}204export function toAnthropicService(model: AnthropicModel): AnthropicService {205return `${ANTHROPIC_PREFIX}${model}`;206}207export function isAnthropicService(208service: string,209): service is AnthropicService {210return service.startsWith(ANTHROPIC_PREFIX);211}212export function fromAnthropicService(213service: AnthropicService,214): AnthropicModel {215if (!isAnthropicService(service)) {216throw new Error(`not a mistral service: ${service}`);217}218return service.slice(ANTHROPIC_PREFIX.length) as AnthropicModel;219}220221// the hardcoded list of available language models – there are also dynamic ones, like OllamaLLM objects222export const LANGUAGE_MODELS = [223...MODELS_OPENAI,224...MISTRAL_MODELS,225...GOOGLE_MODELS,226...ANTHROPIC_MODELS,227] as const;228229export const USER_SELECTABLE_LLMS_BY_VENDOR: {230[vendor in LLMServiceName]: Readonly<LanguageModelCore[]>;231} = {232openai: MODELS_OPENAI.filter(233(m) =>234m === "gpt-4" ||235m === "gpt-4-turbo-preview-8k" ||236m === "gpt-4o-8k" ||237m === "gpt-4o-mini-8k" ||238m === "gpt-4.1" ||239m === "gpt-4.1-mini",240241// ATTN: there is code for o1 and o1-mini, but it does not work yet.242// The API changed, there is no support for streaming, and it took243// too much of my time trying to get it to work already.244// m === "o1-mini-8k" ||245// m === "o1-8k",246),247google: GOOGLE_MODELS.filter(248(m) =>249// we only enable 1.5 pro and 1.5 flash with a limited context window.250m === "gemini-1.5-pro-8k" ||251//m === "gemini-1.5-flash-8k" ||252m === "gemini-2.0-flash-8k" ||253m === "gemini-2.0-flash-lite-8k",254),255mistralai: MISTRAL_MODELS.filter((m) => m !== "mistral-medium-latest"),256anthropic: ANTHROPIC_MODELS.filter((m) => {257// we show opus and the context restricted models (to avoid high costs)258return (259m === "claude-3-opus-8k" ||260m === "claude-3-5-sonnet-4k" ||261m === "claude-3-haiku-8k"262);263}),264ollama: [], // this is empty, because these models are not hardcoded265custom_openai: [], // this is empty, because these models are not hardcoded]266user: [],267} as const;268269// This hardcodes which models can be selected by users – refine this by setting site_settings.selectable_llms!270// Make sure to update this when adding new models.271// This is used in e.g. mentionable-users.tsx, model-switch.tsx and other-settings.tsx272export const USER_SELECTABLE_LANGUAGE_MODELS = [273...USER_SELECTABLE_LLMS_BY_VENDOR.openai,274...USER_SELECTABLE_LLMS_BY_VENDOR.google,275...USER_SELECTABLE_LLMS_BY_VENDOR.mistralai,276...USER_SELECTABLE_LLMS_BY_VENDOR.anthropic,277] as const;278279export type OllamaLLM = string;280export type CustomOpenAI = string;281282// use the one without Ollama to get stronger typing. Ollama could be any string starting with the OLLAMA_PREFIX.283export type LanguageModelCore = (typeof LANGUAGE_MODELS)[number];284export type LanguageModel = LanguageModelCore | OllamaLLM;285export function isCoreLanguageModel(286model: unknown,287): model is LanguageModelCore {288if (typeof model !== "string") return false;289return LANGUAGE_MODELS.includes(model as any);290}291292// we check if the given object is any known language model293export function isLanguageModel(model?: unknown): model is LanguageModel {294if (model == null) return false;295if (typeof model !== "string") return false;296if (isOllamaLLM(model)) return true;297if (isCustomOpenAI(model)) return true;298if (isUserDefinedModel(model)) return true; // this also checks, if there is a valid model inside299return LANGUAGE_MODELS.includes(model as any);300}301302export type LLMServiceName = (typeof LANGUAGE_MODEL_SERVICES)[number];303304export function isLLMServiceName(service: unknown): service is LLMServiceName {305if (typeof service !== "string") return false;306return LANGUAGE_MODEL_SERVICES.includes(service as any);307}308309export type LLMServicesAvailable = Record<LLMServiceName, boolean>;310311interface LLMService {312name: string;313short: string; // additional short text next to the company name314desc: string; // more detailed description315url: string;316}317318export const LLM_PROVIDER: { [key in LLMServiceName]: LLMService } = {319openai: {320name: "OpenAI",321short: "AI research and deployment company",322desc: "OpenAI is an AI research and deployment company. Their mission is to ensure that artificial general intelligence benefits all of humanity.",323url: "https://openai.com/",324},325google: {326name: "Google",327short: "Technology company",328desc: "Google's mission is to organize the world's information and make it universally accessible and useful.",329url: "https://gemini.google.com/",330},331anthropic: {332name: "Anthropic",333short: "AI research company",334desc: "Anthropic is an American artificial intelligence (AI) startup company, founded by former members of OpenAI.",335url: "https://www.anthropic.com/",336},337mistralai: {338name: "Mistral AI",339short: "French AI company",340desc: "Mistral AI is a French company selling artificial intelligence (AI) products.",341url: "https://mistral.ai/",342},343ollama: {344name: "Ollama",345short: "Open-source software",346desc: "Ollama language model server at a custom API endpoint.",347url: "https://ollama.com/",348},349custom_openai: {350name: "OpenAI API",351short: "Custom endpoint",352desc: "Calls a custom OpenAI API endoint.",353url: "https://js.langchain.com/v0.1/docs/integrations/llms/openai/",354},355user: {356name: "User Defined",357short: "Account → Language Model",358desc: "Defined by the user in Account Settings → Language Model",359url: "",360},361} as const;362363interface ValidLanguageModelNameProps {364model: string | undefined;365filter: LLMServicesAvailable;366ollama: string[]; // keys of ollama models367custom_openai: string[]; // keys of custom openai models368selectable_llms: string[]; // either empty, or an array stored in the server settings369}370371// NOTE: these values must be in sync with the "no" vals in db-schema/site-defaults.ts372const DEFAULT_FILTER: Readonly<LLMServicesAvailable> = {373openai: false,374google: false,375ollama: false,376mistralai: false,377anthropic: false,378custom_openai: false,379user: false,380} as const;381382// this is used in initialization functions. e.g. to get a default model depending on the overall availability383// usually, this should just return the chatgpt3 model, but e.g. if neither google or openai is available,384// then it might even falls back to an available ollama model. It needs to return a string, though, for the frontend, etc.385export function getValidLanguageModelName({386model,387filter = DEFAULT_FILTER,388ollama,389custom_openai,390selectable_llms,391}: ValidLanguageModelNameProps): LanguageModel {392if (typeof model === "string" && isValidModel(model)) {393try {394if (isCoreLanguageModel(model)) {395const v = model2vendor(model).name;396if (filter[v] && selectable_llms.includes(model)) {397return model;398}399}400401if (isOllamaLLM(model) && ollama.includes(fromOllamaModel(model))) {402return model;403}404405if (406isCustomOpenAI(model) &&407custom_openai.includes(fromCustomOpenAIModel(model))408) {409return model;410}411412if (isUserDefinedModel(model)) {413return model;414}415} catch {}416}417418for (const free of [true, false]) {419const dflt = getDefaultLLM(420selectable_llms,421filter,422ollama,423custom_openai,424free,425);426if (dflt != null) {427return dflt;428}429}430return DEFAULT_MODEL;431}432433export const DEFAULT_LLM_PRIORITY: Readonly<UserDefinedLLMService[]> = [434"google",435"openai",436"anthropic",437"mistralai",438"ollama",439"custom_openai",440] as const;441442export function getDefaultLLM(443selectable_llms: string[],444filter: LLMServicesAvailable,445ollama?: { [key: string]: any },446custom_openai?: { [key: string]: any },447only_free = true,448): LanguageModel {449for (const v of DEFAULT_LLM_PRIORITY) {450if (!filter[v]) continue;451for (const m of USER_SELECTABLE_LLMS_BY_VENDOR[v]) {452if (selectable_llms.includes(m)) {453const isFree = LLM_COST[m].free ?? true;454if ((only_free && isFree) || !only_free) {455return m;456}457}458}459}460// none of the standard models, pick the first ollama or custom_openai461if (ollama != null && !isEmpty(ollama)) {462return toOllamaModel(Object.keys(ollama)[0]);463}464if (custom_openai != null && !isEmpty(custom_openai)) {465return toCustomOpenAIModel(Object.keys(custom_openai)[0]);466}467return DEFAULT_MODEL;468}469470export interface OpenAIMessage {471role: "system" | "user" | "assistant";472content: string;473}474export type OpenAIMessages = OpenAIMessage[];475476export const OLLAMA_PREFIX = "ollama-";477export type OllamaService = string;478export function isOllamaService(service: string): service is OllamaService {479return isOllamaLLM(service);480}481482export const CUSTOM_OPENAI_PREFIX = "custom_openai-";483export type CustomOpenAIService = string;484export function isCustomOpenAIService(485service: string,486): service is CustomOpenAIService {487return isCustomOpenAI(service);488}489490export const MISTRAL_PREFIX = "mistralai-";491export type MistralService = `${typeof MISTRAL_PREFIX}${MistralModel}`;492export function isMistralService(service: string): service is MistralService {493return service.startsWith(MISTRAL_PREFIX);494}495496export const GOOGLE_PREFIX = "google-";497498// we encode the in the frontend and elsewhere with the service name as a prefix499// ATTN: don't change the encoding pattern of [vendor]-[model]500// for whatever reason, it's also described that way in purchases/close.ts501export type LanguageServiceCore =502| `${typeof OPENAI_PREFIX}${OpenAIModel}`503| `${typeof GOOGLE_PREFIX}${504| "text-bison-001"505| "chat-bison-001"506| "embedding-gecko-001"}`507| `${typeof GOOGLE_PREFIX}${GoogleModel}`508| AnthropicService509| MistralService;510511export type LanguageService =512| LanguageServiceCore513| OllamaService514| CustomOpenAIService;515516// used e.g. for checking "account-id={string}" and other things like that517export const LANGUAGE_MODEL_PREFIXES = [518"chatgpt",519...LANGUAGE_MODEL_SERVICES.map((v) => `${v}-`),520] as const;521522// we encode the in the frontend and elsewhere with the service name as a prefix523export function model2service(model: LanguageModel): LanguageService {524if (model === "text-embedding-ada-002") {525return `${OPENAI_PREFIX}${model}`;526}527if (528isOllamaLLM(model) ||529isCustomOpenAI(model) ||530isUserDefinedModel(model)531) {532return model; // already has a useful prefix533}534if (isMistralModel(model)) {535return toMistralService(model);536}537if (isAnthropicModel(model)) {538return toAnthropicService(model);539}540if (isLanguageModel(model)) {541if (542model === "text-bison-001" ||543model === "chat-bison-001" ||544model === "embedding-gecko-001" ||545isGoogleModel(model)546) {547return `${GOOGLE_PREFIX}${model}`;548} else {549return `${OPENAI_PREFIX}${model}`;550}551}552553throw new Error(`unknown model: ${model}`);554}555556// inverse of model2service, but robust for chat avatars, which might not have a prefix557// TODO: fix the mess558export function service2model(559service: LanguageService | "chatgpt",560): LanguageModel {561if (service === "chatgpt") {562return "gpt-3.5-turbo";563}564const lm = service2model_core(service);565if (lm == null) {566// We don't throw an error, since the frontend would crash567// throw new Error(`unknown service: ${service}`);568console.warn(`service2model: unknown service: ${service}`);569return "gpt-3.5-turbo";570}571return lm;572}573574export function service2model_core(575service: LanguageService,576): LanguageModel | null {577// split off the first part of service, e.g., "openai-" or "google-"578const s = service.split("-")[0];579const hasPrefix = LANGUAGE_MODEL_SERVICES.some((v) => s === v);580581if (isUserDefinedModel(service)) {582return service;583}584585const m = hasPrefix ? service.split("-").slice(1).join("-") : service;586if (hasPrefix) {587// we add the trailing "-" to match with these prefixes, which include the "-"588switch (`${s}-`) {589case OLLAMA_PREFIX:590return toOllamaModel(m);591case CUSTOM_OPENAI_PREFIX:592return toCustomOpenAIModel(m);593}594}595596if (LANGUAGE_MODELS.includes(m as any)) {597return m;598}599return null;600}601602// NOTE: do not use this – instead use server_settings.default_llm603export const DEFAULT_MODEL: LanguageModel = "gemini-2.0-flash-8k";604605interface LLMVendor {606name: LLMServiceName;607url: string;608}609610export function model2vendor(model): LLMVendor {611if (isUserDefinedModel(model)) {612return { name: "user", url: "" };613} else if (isOllamaLLM(model)) {614return { name: "ollama", url: LLM_PROVIDER.ollama.url };615} else if (isCustomOpenAI(model)) {616return {617name: "custom_openai",618url: LLM_PROVIDER.custom_openai.url,619};620} else if (isMistralModel(model)) {621return { name: "mistralai", url: LLM_PROVIDER.mistralai.url };622} else if (isOpenAIModel(model)) {623return { name: "openai", url: LLM_PROVIDER.openai.url };624} else if (isGoogleModel(model)) {625return { name: "google", url: LLM_PROVIDER.google.url };626} else if (isAnthropicModel(model)) {627return { name: "anthropic", url: LLM_PROVIDER.anthropic.url };628}629630throw new Error(`model2vendor: unknown model: "${model}"`);631}632633// wraps the model name in an object that indicates that it's an ollama model634// TODO: maybe it will be necessary at some point to pass in the list of available ollama models635// TODO: in the future, this object will also contain info like the max tokens and other parameters (from the DB)636export function toOllamaModel(model: string): OllamaLLM {637if (isOllamaLLM(model)) {638throw new Error(`already an ollama model: ${model}`);639}640return `${OLLAMA_PREFIX}${model}`;641}642643// unwraps the model name from an object that indicates that it's an ollama model644export function fromOllamaModel(model: OllamaLLM) {645if (!isOllamaLLM(model)) {646throw new Error(`not an ollama model: ${model}`);647}648return model.slice(OLLAMA_PREFIX.length);649}650651export function isOllamaLLM(model: unknown): model is OllamaLLM {652return (653typeof model === "string" &&654model.startsWith(OLLAMA_PREFIX) &&655model.length > OLLAMA_PREFIX.length656);657}658659export function toCustomOpenAIModel(model: string): CustomOpenAI {660if (isCustomOpenAI(model)) {661throw new Error(`already a custom openai model: ${model}`);662}663return `${CUSTOM_OPENAI_PREFIX}${model}`;664}665666export function isCustomOpenAI(model: unknown): model is CustomOpenAI {667return (668typeof model === "string" &&669model.startsWith(CUSTOM_OPENAI_PREFIX) &&670model.length > CUSTOM_OPENAI_PREFIX.length671);672}673674export function fromCustomOpenAIModel(model: CustomOpenAI) {675if (!isCustomOpenAI(model)) {676throw new Error(`not a custom openai model: ${model}`);677}678return model.slice(CUSTOM_OPENAI_PREFIX.length);679}680681export function toMistralService(model: string): MistralService {682if (isMistralService(model)) {683throw new Error(`already a mistral model: ${model}`);684}685if (!isMistralModel(model)) {686throw new Error(`not a mistral model: ${model}`);687}688return `${MISTRAL_PREFIX}${model}`;689}690691export function fromMistralService(model: MistralService) {692if (!isMistralService(model)) {693throw new Error(`not a mistral model: ${model}`);694}695return model.slice(MISTRAL_PREFIX.length);696}697698type LLM2String = {699[key in700| (typeof USER_SELECTABLE_LANGUAGE_MODELS)[number]701| "chatgpt" // some additional ones, backwards compatibility702| "chatgpt3"703| "chatgpt4"704| "gpt-4-32k"705| "text-bison-001"706| "chat-bison-001"]: string;707};708709// Map from psuedo account_id to what should be displayed to user.710// This is used in various places in the frontend.711// Google PaLM: https://cloud.google.com/vertex-ai/docs/generative-ai/pricing712export const LLM_USERNAMES: LLM2String = {713chatgpt: "GPT-3.5",714chatgpt3: "GPT-3.5",715chatgpt4: "GPT-4",716"gpt-4": "GPT-4",717"gpt-4-32k": "GPT-4-32k",718"gpt-3.5-turbo": "GPT-3.5",719"gpt-3.5-turbo-16k": "GPT-3.5-16k",720"gpt-4-turbo-preview": "GPT-4 Turbo 128k",721"gpt-4-turbo-preview-8k": "GPT-4 Turbo",722"gpt-4-turbo": "GPT-4 Turbo 128k",723"gpt-4-turbo-8k": "GPT-4 Turbo",724"gpt-4o": "GPT-4o 128k",725"gpt-4o-8k": "GPT-4o",726"gpt-4o-mini": "GPT-4o Mini 128k",727"gpt-4o-mini-8k": "GPT-4o Mini",728"gpt-4.1": "GPT-4.1",729"gpt-4.1-mini": "GPT-4.1 Mini",730"o1-mini-8k": "OpenAI o1-mini",731"o1-8k": "OpenAI o1",732"o1-mini": "OpenAI o1-mini",733o1: "OpenAI o1",734"text-embedding-ada-002": "Text Embedding Ada 002", // TODO: this is for embeddings, should be moved to a different place735"text-bison-001": "PaLM 2",736"chat-bison-001": "PaLM 2",737"gemini-pro": "Gemini 1.0 Pro",738"gemini-1.0-ultra": "Gemini 1.0 Ultra",739"gemini-1.5-pro": "Gemini 1.5 Pro 1m",740"gemini-1.5-pro-8k": "Gemini 1.5 Pro",741"gemini-1.5-flash-8k": "Gemini 1.5 Flash",742"gemini-2.0-flash-8k": "Gemini 2.0 Flash",743"gemini-2.0-flash-lite-8k": "Gemini 2.0 Flash Lite",744"mistral-small-latest": "Mistral AI Small",745"mistral-medium-latest": "Mistral AI Medium",746"mistral-large-latest": "Mistral AI Large",747"claude-3-haiku": "Claude 3 Haiku 200k",748"claude-3-haiku-8k": "Claude 3 Haiku",749"claude-3-sonnet": "Claude 3 Sonnet 200k",750"claude-3-sonnet-4k": "Claude 3 Sonnet",751"claude-3-5-sonnet": "Claude 3.5 Sonnet 200k",752"claude-3-5-sonnet-4k": "Claude 3.5 Sonnet",753"claude-3-opus": "Claude 3 Opus 200k",754"claude-3-opus-8k": "Claude 3 Opus",755} as const;756757// similar to the above, we map to short user-visible description texts758// this comes next to the name, hence you do not have to mention the name759export const LLM_DESCR: LLM2String = {760chatgpt: "Fast, great for everyday tasks. (OpenAI, 4k token context)",761chatgpt3: "Fast, great for everyday tasks. (OpenAI, 4k token context)",762chatgpt4:763"Can follow complex instructions and solve difficult problems. (OpenAI, 8k token context)",764"gpt-4":765"Powerful OpenAI model. Can follow complex instructions and solve difficult problems. (OpenAI, 8k token context)",766"gpt-4.1":767"Powerful OpenAI model. Can follow complex instructions and solve difficult problems. (OpenAI, 8k token context)",768"gpt-4-32k": "",769"gpt-3.5-turbo": "Fast, great for everyday tasks. (OpenAI, 4k token context)",770"gpt-3.5-turbo-16k": `Same as ${LLM_USERNAMES["gpt-3.5-turbo"]} but with larger 16k token context`,771"gpt-4-turbo-preview-8k":772"More powerful, fresher knowledge, and lower price than GPT-4. (OpenAI, 8k token context)",773"gpt-4-turbo-preview": "Like GPT-4 Turbo, but with up to 128k token context",774"gpt-4-turbo-8k":775"Faster, fresher knowledge, and lower price than GPT-4. (OpenAI, 8k token context)",776"gpt-4-turbo": "Like GPT-4 Turbo, but with up to 128k token context",777"gpt-4o-8k":778"Most powerful, fastest, and cheapest (OpenAI, 8k token context)",779"gpt-4o": "Most powerful fastest, and cheapest (OpenAI, 128k token context)",780"gpt-4o-mini-8k":781"Most cost-efficient small model (OpenAI, 8k token context)",782"gpt-4.1-mini": "Most cost-efficient small model (OpenAI, 8k token context)",783"gpt-4o-mini": "Most cost-efficient small model (OpenAI, 128k token context)",784"text-embedding-ada-002": "Text embedding Ada 002 by OpenAI", // TODO: this is for embeddings, should be moved to a different place785"o1-8k": "Spends more time thinking (8k token context)",786"o1-mini-8k": "A cost-efficient reasoning model (8k token context)",787o1: "Spends more time thinking (8k token context)",788"o1-mini": "A cost-efficient reasoning model (8k token context)",789"text-bison-001": "",790"chat-bison-001": "",791"gemini-pro":792"Google's Gemini 1.0 Pro Generative AI model (30k token context)",793"gemini-1.0-ultra":794"Google's Gemini 1.0 Ultra Generative AI model (30k token context)",795"gemini-1.5-pro":796"Google's Gemini 1.5 Pro Generative AI model (1m token context)",797"gemini-1.5-pro-8k":798"Google's Gemini 1.5 Pro Generative AI model (8k token context)",799"gemini-1.5-flash-8k":800"Google's Gemini 1.5 Flash Generative AI model (8k token context)",801"gemini-2.0-flash-8k":802"Google's Gemini 2.0 Flash Generative AI model (8k token context)",803"gemini-2.0-flash-lite-8k":804"Google's Gemini 2.0 Flash Lite Generative AI model (8k token context)",805"mistral-small-latest":806"Fast, simple queries, short answers, less capabilities. (Mistral AI, 4k token context)",807"mistral-medium-latest":808"Intermediate tasks, summarizing, generating documents, etc. (Mistral AI, 4k token context)",809"mistral-large-latest":810"Most powerful, large reasoning capabilities, but slower. (Mistral AI, 4k token context)",811"claude-3-haiku":812"Fastest model, lightweight actions (Anthropic, 200k token context)",813"claude-3-haiku-8k":814"Fastest model, lightweight actions (Anthropic, 8k token context)",815"claude-3-5-sonnet":816"Our most intelligent model (Anthropic, 200k token context)",817"claude-3-5-sonnet-4k":818"Our most intelligent model (Anthropic, 4k token context)",819"claude-3-sonnet":820"Best combination of performance and speed (Anthropic, 200k token context)",821"claude-3-sonnet-4k":822"Best combination of performance and speed (Anthropic, 4k token context)",823"claude-3-opus":824"Excels at writing and complex tasks (Anthropic, 200k token context)",825"claude-3-opus-8k":826"Excels at writing and complex tasks (Anthropic, 8k token context)",827} as const;828829export function isFreeModel(model: unknown, isCoCalcCom: boolean): boolean {830if (!isCoCalcCom) return true;831if (isUserDefinedModel(model)) return true;832if (isOllamaLLM(model)) return true;833if (isCustomOpenAI(model)) return true;834if (typeof model === "string" && LANGUAGE_MODELS.includes(model as any)) {835// i.e. model is now of type CoreLanguageModel and836const costInfo = LLM_COST[model];837if (costInfo != null) {838return costInfo.free;839}840}841// all others are free (this should actually never happen, but we're cautious)842return true;843}844845// this is used in purchases/get-service-cost846// we only need to check for the vendor prefixes, no special cases!847export function isLanguageModelService(848service: string,849): service is LanguageService {850if (isUserDefinedModel(service)) return true;851for (const v of LANGUAGE_MODEL_SERVICES) {852if (service.startsWith(`${v}-`)) {853return true;854}855}856return false;857}858859export function getLLMServiceStatusCheckMD(service: LLMServiceName): string {860switch (service) {861case "openai":862return `OpenAI [status](https://status.openai.com) and [downdetector](https://downdetector.com/status/openai).`;863case "google":864return `Google [status](https://status.cloud.google.com) and [downdetector](https://downdetector.com/status/google-cloud).`;865case "ollama":866return `No status information for Ollama available.`;867case "custom_openai":868return `No status information for Custom OpenAI available.`;869case "mistralai":870return `No status information for Mistral AI available.`;871case "anthropic":872return `Anthropic [status](https://status.anthropic.com/).`;873case "user":874return `No status information for user defined model available.`;875default:876unreachable(service);877}878return "";879}880881interface Cost {882prompt_tokens: number;883completion_tokens: number;884max_tokens: number;885free: boolean; // whether this model has a metered paid usage, or offered for free886}887888// price per token for a given price of USD per 1M tokens889function usd1Mtokens(usd: number): number {890return usd / 1_000_000;891}892893// This is the official published cost that openai charges.894// It changes over time, so this will sometimes need to be updated.895// Our cost is a configurable multiple of this.896// https://openai.com/pricing#language-models897// There appears to be no api that provides the prices, unfortunately.898export const LLM_COST: { [name in LanguageModelCore]: Cost } = {899"gpt-4": {900prompt_tokens: usd1Mtokens(30),901completion_tokens: usd1Mtokens(60),902max_tokens: 8192,903free: false,904},905"gpt-4-32k": {906prompt_tokens: usd1Mtokens(60),907completion_tokens: usd1Mtokens(120),908max_tokens: 32768,909free: false,910},911"gpt-3.5-turbo": {912prompt_tokens: usd1Mtokens(3),913completion_tokens: usd1Mtokens(6),914max_tokens: 4096,915free: true,916},917"gpt-3.5-turbo-16k": {918prompt_tokens: usd1Mtokens(3),919completion_tokens: usd1Mtokens(6),920max_tokens: 16384,921free: false,922},923// like above, but we limit the tokens to reduce how much money user has to commit to924"gpt-4-turbo-preview-8k": {925prompt_tokens: usd1Mtokens(10),926completion_tokens: usd1Mtokens(30),927max_tokens: 8192, // the actual reply is 8k, and we use this to truncate the input prompt!928free: false,929},930"gpt-4-turbo-preview": {931prompt_tokens: usd1Mtokens(10), // $10.00 / 1M tokens932completion_tokens: usd1Mtokens(30), // $30.00 / 1M tokens933max_tokens: 128000, // This is a lot: blows up the "max cost" calculation → requires raising the minimum balance and quota limit934free: false,935}, // like above, but we limit the tokens to reduce how much money user has to commit to936"gpt-4-turbo-8k": {937prompt_tokens: usd1Mtokens(10),938completion_tokens: usd1Mtokens(30),939max_tokens: 8192, // the actual reply is 8k, and we use this to truncate the input prompt!940free: false,941},942"gpt-4-turbo": {943prompt_tokens: usd1Mtokens(10), // $10.00 / 1M tokens944completion_tokens: usd1Mtokens(30), // $30.00 / 1M tokens945max_tokens: 128000, // This is a lot: blows up the "max cost" calculation → requires raising the minimum balance and quota limit946free: false,947},948"gpt-4.1": {949prompt_tokens: usd1Mtokens(2),950completion_tokens: usd1Mtokens(8),951max_tokens: 8192,952free: false,953},954"gpt-4.1-mini": {955prompt_tokens: usd1Mtokens(0.4),956completion_tokens: usd1Mtokens(1.6),957max_tokens: 8192,958free: true,959},960"gpt-4o-8k": {961prompt_tokens: usd1Mtokens(2.5),962completion_tokens: usd1Mtokens(10),963max_tokens: 8192, // like gpt-4-turbo-8k964free: false,965},966"gpt-4o": {967prompt_tokens: usd1Mtokens(2.5),968completion_tokens: usd1Mtokens(10),969max_tokens: 128000, // This is a lot: blows up the "max cost" calculation → requires raising the minimum balance and quota limit970free: false,971},972"gpt-4o-mini-8k": {973prompt_tokens: usd1Mtokens(0.15),974completion_tokens: usd1Mtokens(0.6),975max_tokens: 8192, // like gpt-4-turbo-8k976free: true,977},978"gpt-4o-mini": {979prompt_tokens: usd1Mtokens(0.15),980completion_tokens: usd1Mtokens(0.6),981max_tokens: 128000, // This is a lot: blows up the "max cost" calculation → requires raising the minimum balance and quota limit982free: true,983},984o1: {985prompt_tokens: usd1Mtokens(15),986completion_tokens: usd1Mtokens(7.5),987max_tokens: 8192, // like gpt-4-turbo-8k988free: false,989},990"o1-mini": {991prompt_tokens: usd1Mtokens(3),992completion_tokens: usd1Mtokens(1.5),993max_tokens: 8192, // like gpt-4-turbo-8k994free: false,995},996"o1-8k": {997prompt_tokens: usd1Mtokens(15),998completion_tokens: usd1Mtokens(7.5),999max_tokens: 8192, // like gpt-4-turbo-8k1000free: false,1001},1002"o1-mini-8k": {1003prompt_tokens: usd1Mtokens(3),1004completion_tokens: usd1Mtokens(1.5),1005max_tokens: 8192, // like gpt-4-turbo-8k1006free: false,1007},1008// also OpenAI1009"text-embedding-ada-002": {1010prompt_tokens: 0.0001 / 1000,1011completion_tokens: 0.0001 / 1000, // NOTE: this isn't a thing with embeddings1012max_tokens: 8191,1013free: false,1014},1015// https://ai.google.dev/pricing1016"gemini-pro": {1017prompt_tokens: usd1Mtokens(0.5),1018completion_tokens: usd1Mtokens(1.5),1019max_tokens: 30720,1020free: true,1021},1022"gemini-1.5-pro-8k": {1023prompt_tokens: usd1Mtokens(1.25), // (we're below the 128k context)1024completion_tokens: usd1Mtokens(5),1025max_tokens: 8_000,1026free: false,1027},1028"gemini-1.5-pro": {1029prompt_tokens: usd1Mtokens(2.5),1030completion_tokens: usd1Mtokens(10),1031max_tokens: 1048576,1032free: false,1033},1034"gemini-1.0-ultra": {1035prompt_tokens: usd1Mtokens(1), // TODO: price not yet known!1036completion_tokens: usd1Mtokens(1),1037max_tokens: 30720,1038free: true,1039},1040"gemini-1.5-flash-8k": {1041prompt_tokens: usd1Mtokens(0.075),1042completion_tokens: usd1Mtokens(0.3),1043max_tokens: 8_000,1044free: true,1045},1046// https://ai.google.dev/gemini-api/docs/pricing?hl=de1047"gemini-2.0-flash-8k": {1048prompt_tokens: usd1Mtokens(0.1),1049completion_tokens: usd1Mtokens(0.4),1050max_tokens: 8_000,1051free: true,1052},1053"gemini-2.0-flash-lite-8k": {1054prompt_tokens: usd1Mtokens(0.075),1055completion_tokens: usd1Mtokens(0.3),1056max_tokens: 8_000,1057free: true,1058},1059// https://mistral.ai/technology/1060"mistral-small-latest": {1061prompt_tokens: usd1Mtokens(0.2),1062completion_tokens: usd1Mtokens(0.6),1063max_tokens: 4096, // TODO don't know the real value, see getMaxTokens1064free: true,1065},1066"mistral-medium-latest": {1067prompt_tokens: usd1Mtokens(2.7),1068completion_tokens: usd1Mtokens(8.1),1069max_tokens: 4096, // TODO don't know the real value, see getMaxTokens1070free: true,1071},1072"mistral-large-latest": {1073prompt_tokens: usd1Mtokens(2),1074completion_tokens: usd1Mtokens(6),1075max_tokens: 4096, // TODO don't know the real value, see getMaxTokens1076free: false,1077},1078// Anthropic: pricing somewhere on that page: https://www.anthropic.com/api1079"claude-3-opus-8k": {1080prompt_tokens: usd1Mtokens(15),1081completion_tokens: usd1Mtokens(75),1082max_tokens: 8_000, // limited to 8k tokens, to reduce the necessary spend limit to commit to1083free: false,1084},1085"claude-3-opus": {1086prompt_tokens: usd1Mtokens(15),1087completion_tokens: usd1Mtokens(75),1088max_tokens: 200_000,1089free: false,1090},1091"claude-3-5-sonnet": {1092prompt_tokens: usd1Mtokens(3),1093completion_tokens: usd1Mtokens(15),1094max_tokens: 200_000,1095free: false,1096},1097"claude-3-5-sonnet-4k": {1098prompt_tokens: usd1Mtokens(3),1099completion_tokens: usd1Mtokens(15),1100max_tokens: 4_000, // limited to 4k tokens1101free: false,1102},1103"claude-3-sonnet-4k": {1104prompt_tokens: usd1Mtokens(3),1105completion_tokens: usd1Mtokens(15),1106max_tokens: 4_000, // limited to 4k tokens, offered for free1107free: true,1108},1109"claude-3-sonnet": {1110prompt_tokens: usd1Mtokens(3),1111completion_tokens: usd1Mtokens(15),1112max_tokens: 200_000,1113free: false,1114},1115"claude-3-haiku-8k": {1116prompt_tokens: usd1Mtokens(0.8),1117completion_tokens: usd1Mtokens(4),1118max_tokens: 8_000, // limited to 8k tokens, offered for free1119free: true,1120},1121"claude-3-haiku": {1122prompt_tokens: usd1Mtokens(0.8),1123completion_tokens: usd1Mtokens(4),1124max_tokens: 200_000,1125free: false,1126},1127} as const;11281129// TODO: remove this test – it's only used server side, and that server side check should work for all known LLM models1130export function isValidModel(model?: string): boolean {1131if (model == null) return false;1132if (isUserDefinedModel(model)) return true;1133if (isOllamaLLM(model)) return true;1134if (isCustomOpenAI(model)) return true;1135if (isMistralModel(model)) return true;1136if (isGoogleModel(model)) return true;1137return LLM_COST[model ?? ""] != null;1138}11391140export function getMaxTokens(model?: LanguageModel): number {1141// TODO: store max tokens in the model object itself, this is just a fallback1142if (isOllamaLLM(model)) return 8192;1143if (isMistralModel(model)) return 4096; // TODO: check with MistralAI1144return LLM_COST[model ?? ""]?.max_tokens ?? 4096;1145}11461147export interface LLMCost {1148prompt_tokens: number;1149completion_tokens: number;1150}11511152export function getLLMCost(1153model: LanguageModelCore,1154markup_percentage: number, // a number like "30" would mean that we increase the wholesale price by multiplying by 1.31155): LLMCost {1156const x = LLM_COST[model];1157if (x == null) {1158throw Error(`unknown model "${model}"`);1159}1160const { prompt_tokens, completion_tokens } = x;1161if (markup_percentage < 0) {1162throw Error("markup percentage can't be negative");1163}1164const f = 1 + markup_percentage / 100;1165return {1166prompt_tokens: prompt_tokens * f,1167completion_tokens: completion_tokens * f,1168};1169}11701171const priceRangeCache = new LRU<string, ReturnType<typeof getLLMPriceRange>>({1172max: 10,1173});11741175export function getLLMPriceRange(1176prompt: number,1177output: number,1178markup_percentage: number,1179): { min: number; max: number } {1180const cacheKey = `${prompt}::${output}::${markup_percentage}`;1181const cached = priceRangeCache.get(cacheKey);1182if (cached) return cached;11831184let min = Infinity;1185let max = 0;1186for (const key in LLM_COST) {1187const model = LLM_COST[key];1188if (!model || isFreeModel(key, true)) continue;1189const { prompt_tokens, completion_tokens } = getLLMCost(1190key as LanguageModelCore,1191markup_percentage,1192);1193const p = prompt * prompt_tokens + output * completion_tokens;11941195min = Math.min(min, p);1196max = Math.max(max, p);1197}1198const ret = { min, max };1199priceRangeCache.set(cacheKey, ret);1200return ret;1201}12021203// The maximum cost for one single call using the given model.1204// We can't know the cost until after it happens, so this bound is useful for1205// ensuring user can afford to make a call.1206export function getMaxCost(1207model: LanguageModelCore,1208markup_percentage: number,1209): number {1210const { prompt_tokens, completion_tokens } = getLLMCost(1211model,1212markup_percentage,1213);1214const { max_tokens } = LLM_COST[model];1215return Math.max(prompt_tokens, completion_tokens) * max_tokens;1216}12171218/**1219* Initially, we just had one system promt for all LLMs.1220* This was tuned for the ChatGPTs by OpenAI, but breaks down for others.1221* For example, Gemini and Mistral are confused by mentioning "CoCalc" and insert code cells for all kinds of questions.1222*/1223export function getSystemPrompt(1224model: LanguageModel,1225_path: string | undefined,1226) {1227// TODO: for now, path is ignored. We might want to use it to customize the prompt in the future.1228const common = "Be brief.";1229const math = "Enclose any math formulas in $.";12301231if (1232model2vendor(model).name === "openai" ||1233model.startsWith(OPENAI_PREFIX)1234) {1235const mdCode =1236"Include the language directly after the triple backticks in all markdown code blocks.";1237return `Assume full access to CoCalc and using CoCalc right now.\n${mdCode}\n${math}\n${common}`;1238}12391240// mistral stupidly inserts anything mentioned in the prompt as examples, always.1241if (1242model2vendor(model).name === "mistralai" ||1243model.startsWith(MISTRAL_PREFIX)1244) {1245return common;1246}12471248if (1249model2vendor(model).name === "google" ||1250model.startsWith(GOOGLE_PREFIX)1251) {1252return `${math}\n${common}`;1253}12541255if (1256model2vendor(model).name === "ollama" ||1257model.startsWith(OLLAMA_PREFIX)1258) {1259return `${common}`;1260}12611262if (1263model2vendor(model).name === "anthropic" ||1264model.startsWith(ANTHROPIC_PREFIX)1265) {1266return `${math}\n${common}`;1267}12681269const mdCode = `Any code blocks in triple backticks should mention the language after the first backticks. For example \`\`\`python\nprint("Hello, World!")\n\`\`\``;1270return `${mdCode}\n${math}\n${common}`;1271}127212731274