Skip to content

Class: Llama

Defined in: bindings/Llama.ts:32

Properties

onDispose

ts
readonly onDispose: EventRelay<void>;

Defined in: bindings/Llama.ts:67

Accessors

disposed

Get Signature

ts
get disposed(): boolean

Defined in: bindings/Llama.ts:159

Returns

boolean


classes

Get Signature

ts
get classes(): LlamaClasses

Defined in: bindings/Llama.ts:163

Returns

LlamaClasses


gpu

Get Signature

ts
get gpu(): LlamaGpuType

Defined in: bindings/Llama.ts:170

Returns

LlamaGpuType


supportsGpuOffloading

Get Signature

ts
get supportsGpuOffloading(): boolean

Defined in: bindings/Llama.ts:174

Returns

boolean


supportsMmap

Get Signature

ts
get supportsMmap(): boolean

Defined in: bindings/Llama.ts:178

Returns

boolean


gpuSupportsMmap

Get Signature

ts
get gpuSupportsMmap(): boolean

Defined in: bindings/Llama.ts:182

Returns

boolean


supportsMlock

Get Signature

ts
get supportsMlock(): boolean

Defined in: bindings/Llama.ts:186

Returns

boolean


cpuMathCores

Get Signature

ts
get cpuMathCores(): number

Defined in: bindings/Llama.ts:191

The number of CPU cores that are useful for math

Returns

number


maxThreads

Get Signature

ts
get maxThreads(): number

Defined in: bindings/Llama.ts:202

The maximum number of threads that can be used by the Llama instance.

If set to 0, the Llama instance will have no limit on the number of threads.

See the maxThreads option of getLlama for more information.

Returns

number

Set Signature

ts
set maxThreads(value: number): void

Defined in: bindings/Llama.ts:206

Parameters
ParameterType
valuenumber
Returns

void


logLevel

Get Signature

ts
get logLevel(): LlamaLogLevel

Defined in: bindings/Llama.ts:210

Returns

LlamaLogLevel

Set Signature

ts
set logLevel(value: LlamaLogLevel): void

Defined in: bindings/Llama.ts:214

Parameters
ParameterType
valueLlamaLogLevel
Returns

void


logger

Get Signature

ts
get logger(): (level: LlamaLogLevel, message: string) => void

Defined in: bindings/Llama.ts:224

Returns

Function

Parameters
ParameterType
levelLlamaLogLevel
messagestring
Returns

void

Set Signature

ts
set logger(value: (level: LlamaLogLevel, message: string) => void): void

Defined in: bindings/Llama.ts:228

Parameters
ParameterType
value(level: LlamaLogLevel, message: string) => void
Returns

void


buildType

Get Signature

ts
get buildType(): "localBuild" | "prebuilt"

Defined in: bindings/Llama.ts:235

Returns

"localBuild" | "prebuilt"


cmakeOptions

Get Signature

ts
get cmakeOptions(): Readonly<Record<string, string>>

Defined in: bindings/Llama.ts:239

Returns

Readonly<Record<string, string>>


llamaCppRelease

Get Signature

ts
get llamaCppRelease(): {
  repo: string;
  release: string;
}

Defined in: bindings/Llama.ts:243

Returns
ts
{
  repo: string;
  release: string;
}
repo
ts
readonly repo: string;
release
ts
readonly release: string;

systemInfo

Get Signature

ts
get systemInfo(): string

Defined in: bindings/Llama.ts:247

Returns

string


vramPaddingSize

Get Signature

ts
get vramPaddingSize(): number

Defined in: bindings/Llama.ts:259

VRAM padding used for memory size calculations, as these calculations are not always accurate. This is set by default to ensure stability, but can be configured when you call getLlama.

See vramPadding on getLlama for more information.

Returns

number

Methods

dispose()

ts
dispose(): Promise<void>

Defined in: bindings/Llama.ts:144

Returns

Promise<void>


getVramState()

ts
getVramState(): Promise<{
  total: number;
  used: number;
  free: number;
  unifiedSize: number;
 }>

Defined in: bindings/Llama.ts:269

The total amount of VRAM that is currently being used.

unifiedSize represents the amount of VRAM that is shared between the CPU and GPU. On SoC devices, this is usually the same as total.

Returns

Promise<{ total: number; used: number; free: number; unifiedSize: number; }>


getSwapState()

ts
getSwapState(): Promise<{
  maxSize: number;
  allocated: number;
  used: number;
 }>

Defined in: bindings/Llama.ts:294

Get the state of the swap memory.

maxSize - The maximum size of the swap memory that the system can allocate. If the swap size is dynamic (like on macOS), this will be Infinity.

allocated - The total size allocated by the system for swap memory.

used - The amount of swap memory that is currently being used from the allocated size.

On Windows, this will return the info for the page file.

Returns

Promise<{ maxSize: number; allocated: number; used: number; }>


getGpuDeviceNames()

ts
getGpuDeviceNames(): Promise<string[]>

Defined in: bindings/Llama.ts:320

Returns

Promise<string[]>


loadModel()

ts
loadModel(options: LlamaModelOptions): Promise<LlamaModel>

Defined in: bindings/Llama.ts:328

Parameters

ParameterType
optionsLlamaModelOptions

Returns

Promise<LlamaModel>


createGrammarForJsonSchema()

ts
createGrammarForJsonSchema<T>(schema: Readonly<T>): Promise<LlamaJsonSchemaGrammar<T>>

Defined in: bindings/Llama.ts:348

Type Parameters

Type Parameter
T extends GbnfJsonSchema

Parameters

ParameterType
schemaReadonly<T>

Returns

Promise<LlamaJsonSchemaGrammar<T>>

See


getGrammarFor()

ts
getGrammarFor(type: 
  | "json"
  | "json_arr"
  | "english"
  | "list"
  | "c"
  | "arithmetic"
  | "japanese"
  | "chess"): Promise<LlamaGrammar>

Defined in: bindings/Llama.ts:353

Parameters

ParameterType
type| "json" | "json_arr" | "english" | "list" | "c" | "arithmetic" | "japanese" | "chess"

Returns

Promise<LlamaGrammar>


createGrammar()

ts
createGrammar(options: LlamaGrammarOptions): Promise<LlamaGrammar>

Defined in: bindings/Llama.ts:360

Parameters

ParameterType
optionsLlamaGrammarOptions

Returns

Promise<LlamaGrammar>

See

Using Grammar tutorial


defaultConsoleLogger()

ts
static defaultConsoleLogger(level: LlamaLogLevel, message: string): void

Defined in: bindings/Llama.ts:564

Parameters

ParameterType
levelLlamaLogLevel
messagestring

Returns

void