Easy to use
Zero-config by default. Works in Node.js, Bun, and Electron. Bootstrap a project with a single command
Learn more
node.js bindings for llama.cpp, and much more
gpt-oss is here!
Experience the ease of running models on your machine
npx -y node-llama-cpp chatTo chat with models using a UI, try the example Electron app
Check out your hardware capabilities
npx -y node-llama-cpp inspect gpuEverything you need to use large language models in your project
Integrate node-llama-cpp in your codebase and prompt models
import {fileURLToPath} from "url";
import path from "path";
import {getLlama, LlamaChatSession} from "node-llama-cpp";
const __dirname = path.dirname(
    fileURLToPath(import.meta.url)
);
const llama = await getLlama();
const model = await llama.loadModel({
    modelPath: path.join(__dirname, "my-model.gguf")
});
const context = await model.createContext();
const session = new LlamaChatSession({
    contextSequence: context.getSequence()
});
const q1 = "Hi there, how are you?";
console.log("User: " + q1);
const a1 = await session.prompt(q1);
console.log("AI: " + a1);Get embedding for a given text
import {fileURLToPath} from "url";
import path from "path";
import {getLlama} from "node-llama-cpp";
const __dirname = path.dirname(
    fileURLToPath(import.meta.url)
);
const llama = await getLlama();
const model = await llama.loadModel({
    modelPath: path.join(__dirname, "my-model.gguf")
});
const context = await model.createEmbeddingContext();
const text = "Hello world";
console.log("Text:", text);
const embedding = await context.getEmbeddingFor(text);
console.log("Embedding vector:", embedding.vector);Force a model response to follow your JSON schema
import {fileURLToPath} from "url";
import path from "path";
import {getLlama, LlamaChatSession} from "node-llama-cpp";
const __dirname = path.dirname(
    fileURLToPath(import.meta.url)
);
const llama = await getLlama();
const model = await llama.loadModel({
    modelPath: path.join(__dirname, "my-model.gguf")
});
const context = await model.createContext();
const session = new LlamaChatSession({
    contextSequence: context.getSequence()
});
const grammar = await llama.createGrammarForJsonSchema({
    type: "object",
    properties: {
        positiveWordsInUserMessage: {
            type: "array",
            items: {
                type: "string"
            }
        },
        userMessagePositivityScoreFromOneToTen: {
            enum: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
        },
        nameOfUser: {
            oneOf: [{
                type: "null"
            }, {
                type: "string"
            }]
        }
    }
});
const prompt = "Hi there! I'm John. Nice to meet you!";
const res = await session.prompt(prompt, {
    grammar
});
const parsedRes = grammar.parse(res);
console.log("User name:", parsedRes.nameOfUser);
console.log(
    "Positive words in user message:", 
    parsedRes.positiveWordsInUserMessage
);
console.log(
    "User message positivity score:",
    parsedRes.userMessagePositivityScoreFromOneToTen
);Let a model call functions to retrieve data or perform actions
import {fileURLToPath} from "url";
import path from "path";
import {
    getLlama,
    LlamaChatSession,
    defineChatSessionFunction
} from "node-llama-cpp";
const __dirname = path.dirname(
    fileURLToPath(import.meta.url)
);
const llama = await getLlama();
const model = await llama.loadModel({
    modelPath: path.join(__dirname, "my-model.gguf")
});
const context = await model.createContext();
const session = new LlamaChatSession({
    contextSequence: context.getSequence()
});
const fruitPrices: Record<string, string> = {
    "apple": "$6",
    "banana": "$4"
};
const functions = {
    getFruitPrice: defineChatSessionFunction({
        description: "Get the price of a fruit",
        params: {
            type: "object",
            properties: {
                name: {
                    type: "string"
                }
            }
        },
        async handler(params) {
            const name = params.name.toLowerCase();
            if (Object.keys(fruitPrices).includes(name))
                return {
                    name: name,
                    price: fruitPrices[name]
                };
            return `Unrecognized fruit "${params.name}"`;
        }
    })
};
const q1 = "Is an apple more expensive than a banana?";
console.log("User: " + q1);
const a1 = await session.prompt(q1, {functions});
console.log("AI: " + a1);