Class: Llama
Defined in: bindings/Llama.ts:32
Properties
onDispose
readonly onDispose: EventRelay<void>;
Defined in: bindings/Llama.ts:67
Accessors
disposed
Get Signature
get disposed(): boolean
Defined in: bindings/Llama.ts:159
Returns
boolean
classes
Get Signature
get classes(): LlamaClasses
Defined in: bindings/Llama.ts:163
Returns
gpu
Get Signature
get gpu(): LlamaGpuType
Defined in: bindings/Llama.ts:170
Returns
supportsGpuOffloading
Get Signature
get supportsGpuOffloading(): boolean
Defined in: bindings/Llama.ts:174
Returns
boolean
supportsMmap
Get Signature
get supportsMmap(): boolean
Defined in: bindings/Llama.ts:178
Returns
boolean
gpuSupportsMmap
Get Signature
get gpuSupportsMmap(): boolean
Defined in: bindings/Llama.ts:182
Returns
boolean
supportsMlock
Get Signature
get supportsMlock(): boolean
Defined in: bindings/Llama.ts:186
Returns
boolean
cpuMathCores
Get Signature
get cpuMathCores(): number
Defined in: bindings/Llama.ts:191
The number of CPU cores that are useful for math
Returns
number
maxThreads
Get Signature
get maxThreads(): number
Defined in: bindings/Llama.ts:202
The maximum number of threads that can be used by the Llama instance.
If set to 0
, the Llama instance will have no limit on the number of threads.
See the maxThreads
option of getLlama
for more information.
Returns
number
Set Signature
set maxThreads(value: number): void
Defined in: bindings/Llama.ts:206
Parameters
Parameter | Type |
---|---|
value | number |
Returns
void
logLevel
Get Signature
get logLevel(): LlamaLogLevel
Defined in: bindings/Llama.ts:210
Returns
Set Signature
set logLevel(value: LlamaLogLevel): void
Defined in: bindings/Llama.ts:214
Parameters
Parameter | Type |
---|---|
value | LlamaLogLevel |
Returns
void
logger
Get Signature
get logger(): (level: LlamaLogLevel, message: string) => void
Defined in: bindings/Llama.ts:224
Returns
Function
Parameters
Parameter | Type |
---|---|
level | LlamaLogLevel |
message | string |
Returns
void
Set Signature
set logger(value: (level: LlamaLogLevel, message: string) => void): void
Defined in: bindings/Llama.ts:228
Parameters
Parameter | Type |
---|---|
value | (level : LlamaLogLevel , message : string ) => void |
Returns
void
buildType
Get Signature
get buildType(): "localBuild" | "prebuilt"
Defined in: bindings/Llama.ts:235
Returns
"localBuild"
| "prebuilt"
cmakeOptions
Get Signature
get cmakeOptions(): Readonly<Record<string, string>>
Defined in: bindings/Llama.ts:239
Returns
Readonly
<Record
<string
, string
>>
llamaCppRelease
Get Signature
get llamaCppRelease(): {
repo: string;
release: string;
}
Defined in: bindings/Llama.ts:243
Returns
{
repo: string;
release: string;
}
repo
readonly repo: string;
release
readonly release: string;
systemInfo
Get Signature
get systemInfo(): string
Defined in: bindings/Llama.ts:247
Returns
string
vramPaddingSize
Get Signature
get vramPaddingSize(): number
Defined in: bindings/Llama.ts:259
VRAM padding used for memory size calculations, as these calculations are not always accurate. This is set by default to ensure stability, but can be configured when you call getLlama
.
See vramPadding
on getLlama
for more information.
Returns
number
Methods
dispose()
dispose(): Promise<void>
Defined in: bindings/Llama.ts:144
Returns
Promise
<void
>
getVramState()
getVramState(): Promise<{
total: number;
used: number;
free: number;
unifiedSize: number;
}>
Defined in: bindings/Llama.ts:269
The total amount of VRAM that is currently being used.
unifiedSize
represents the amount of VRAM that is shared between the CPU and GPU. On SoC devices, this is usually the same as total
.
Returns
Promise
<{ total
: number
; used
: number
; free
: number
; unifiedSize
: number
; }>
getSwapState()
getSwapState(): Promise<{
maxSize: number;
allocated: number;
used: number;
}>
Defined in: bindings/Llama.ts:294
Get the state of the swap memory.
maxSize
- The maximum size of the swap memory that the system can allocate. If the swap size is dynamic (like on macOS), this will be Infinity
.
allocated
- The total size allocated by the system for swap memory.
used
- The amount of swap memory that is currently being used from the allocated
size.
On Windows, this will return the info for the page file.
Returns
Promise
<{ maxSize
: number
; allocated
: number
; used
: number
; }>
getGpuDeviceNames()
getGpuDeviceNames(): Promise<string[]>
Defined in: bindings/Llama.ts:320
Returns
Promise
<string
[]>
loadModel()
loadModel(options: LlamaModelOptions): Promise<LlamaModel>
Defined in: bindings/Llama.ts:328
Parameters
Parameter | Type |
---|---|
options | LlamaModelOptions |
Returns
createGrammarForJsonSchema()
createGrammarForJsonSchema<T>(schema: Readonly<T>): Promise<LlamaJsonSchemaGrammar<T>>
Defined in: bindings/Llama.ts:348
Type Parameters
Type Parameter |
---|
T extends GbnfJsonSchema |
Parameters
Parameter | Type |
---|---|
schema | Readonly <T > |
Returns
Promise
<LlamaJsonSchemaGrammar
<T
>>
See
getGrammarFor()
getGrammarFor(type:
| "json"
| "json_arr"
| "english"
| "list"
| "c"
| "arithmetic"
| "japanese"
| "chess"): Promise<LlamaGrammar>
Defined in: bindings/Llama.ts:353
Parameters
Parameter | Type |
---|---|
type | | "json" | "json_arr" | "english" | "list" | "c" | "arithmetic" | "japanese" | "chess" |
Returns
createGrammar()
createGrammar(options: LlamaGrammarOptions): Promise<LlamaGrammar>
Defined in: bindings/Llama.ts:360
Parameters
Parameter | Type |
---|---|
options | LlamaGrammarOptions |
Returns
See
Using Grammar tutorial
defaultConsoleLogger()
static defaultConsoleLogger(level: LlamaLogLevel, message: string): void
Defined in: bindings/Llama.ts:564
Parameters
Parameter | Type |
---|---|
level | LlamaLogLevel |
message | string |
Returns
void