Easy to use
Zero-config by default. Works in Node.js, Bun, and Electron. Bootstrap a project with a single command
Learn more
Run AI models locally on your machine
node.js bindings for llama.cpp, and much more
v3.0 is here!Experience the ease of running models on your machine
npx -y node-llama-cpp chat
To chat with models using a UI, try the example Electron app
Check out your hardware capabilities
npx -y node-llama-cpp inspect gpu
Everything you need to use large language models in your project
Integrate node-llama-cpp
in your codebase and prompt models
import {fileURLToPath} from "url";
import path from "path";
import {getLlama, LlamaChatSession} from "node-llama-cpp";
const __dirname = path.dirname(
fileURLToPath(import.meta.url)
);
const llama = await getLlama();
const model = await llama.loadModel({
modelPath: path.join(__dirname, "my-model.gguf")
});
const context = await model.createContext();
const session = new LlamaChatSession({
contextSequence: context.getSequence()
});
const q1 = "Hi there, how are you?";
console.log("User: " + q1);
const a1 = await session.prompt(q1);
console.log("AI: " + a1);
Get embedding for a given text
import {fileURLToPath} from "url";
import path from "path";
import {getLlama} from "node-llama-cpp";
const __dirname = path.dirname(
fileURLToPath(import.meta.url)
);
const llama = await getLlama();
const model = await llama.loadModel({
modelPath: path.join(__dirname, "my-model.gguf")
});
const context = await model.createEmbeddingContext();
const text = "Hello world";
console.log("Text:", text);
const embedding = await context.getEmbeddingFor(text);
console.log("Embedding vector:", embedding.vector);
Force a model response to follow your JSON schema
import {fileURLToPath} from "url";
import path from "path";
import {getLlama, LlamaChatSession} from "node-llama-cpp";
const __dirname = path.dirname(
fileURLToPath(import.meta.url)
);
const llama = await getLlama();
const model = await llama.loadModel({
modelPath: path.join(__dirname, "my-model.gguf")
});
const context = await model.createContext();
const session = new LlamaChatSession({
contextSequence: context.getSequence()
});
const grammar = await llama.createGrammarForJsonSchema({
type: "object",
properties: {
positiveWordsInUserMessage: {
type: "array",
items: {
type: "string"
}
},
userMessagePositivityScoreFromOneToTen: {
enum: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
},
nameOfUser: {
oneOf: [{
type: "null"
}, {
type: "string"
}]
}
}
});
const prompt = "Hi there! I'm John. Nice to meet you!";
const res = await session.prompt(prompt, {
grammar
});
const parsedRes = grammar.parse(res);
console.log("User name:", parsedRes.nameOfUser);
console.log(
"Positive words in user message:",
parsedRes.positiveWordsInUserMessage
);
console.log(
"User message positivity score:",
parsedRes.userMessagePositivityScoreFromOneToTen
);
Let a model call functions to retrieve data or perform actions
import {fileURLToPath} from "url";
import path from "path";
import {
getLlama,
LlamaChatSession,
defineChatSessionFunction
} from "node-llama-cpp";
const __dirname = path.dirname(
fileURLToPath(import.meta.url)
);
const llama = await getLlama();
const model = await llama.loadModel({
modelPath: path.join(__dirname, "my-model.gguf")
});
const context = await model.createContext();
const session = new LlamaChatSession({
contextSequence: context.getSequence()
});
const fruitPrices: Record<string, string> = {
"apple": "$6",
"banana": "$4"
};
const functions = {
getFruitPrice: defineChatSessionFunction({
description: "Get the price of a fruit",
params: {
type: "object",
properties: {
name: {
type: "string"
}
}
},
async handler(params) {
const name = params.name.toLowerCase();
if (Object.keys(fruitPrices).includes(name))
return {
name: name,
price: fruitPrices[name]
};
return `Unrecognized fruit "${params.name}"`;
}
})
};
const q1 = "Is an apple more expensive than a banana?";
console.log("User: " + q1);
const a1 = await session.prompt(q1, {functions});
console.log("AI: " + a1);