A C API for Apple's on-device LLM
Apple provides a high-level Swift API for its on-device LLM. Sometimes it might be useful to have a C API, for example if you want to use the LLM from a low-level language.
The API might look as follows:
void* language_model_create(); bool model_is_available(void*); void model_destroy(void*); void* language_session_create(void*); void session_respond_to(void*, char*, void(*)(char*, char*, void*), void*); void session_destroy(void*);
A simple C example using this API might be something like:
void callback(char* response, char* error, void* info) {
printf("'%s' '%s'\n", response, error);
dispatch_semaphore_signal(info);
}
int main() {
void* model = language_model_create();
bool modelIsAvailable = model_is_available(model);
printf("LLM is available: %s\n", modelIsAvailable ? "yes" : "no");
if (!modelIsAvailable) exit(1);
void* session = language_session_create(model);
dispatch_semaphore_t sem = dispatch_semaphore_create(0);
session_respond_to(session, "Tell me the capital of Greenland.", &callback, sem);
dispatch_semaphore_wait(sem, DISPATCH_TIME_FOREVER);
dispatch_release(sem);
session_destroy(session);
model_destroy(model);
}
Note that since the API is asynchronous, we need a callback and a semaphore so we can wait for the response.
My implementation in Swift is as follows:
import Foundation
import FoundationModels
@_cdecl("language_model_create")
public func language_model_create() -> OpaquePointer {
let model = SystemLanguageModel.default
return OpaquePointer(Unmanaged.passRetained(model).toOpaque())
}
@_cdecl("model_is_available")
public func model_is_available(_ model: OpaquePointer) -> CBool {
let model = Unmanaged<SystemLanguageModel>.fromOpaque(UnsafeRawPointer(model)).takeUnretainedValue()
return model.isAvailable
}
@_cdecl("model_destroy")
public func model_destroy(_ model: OpaquePointer) {
_ = Unmanaged<SystemLanguageModel>.fromOpaque(UnsafeRawPointer(model)).takeRetainedValue()
}
@_cdecl("language_session_create")
public func language_session_create(_ model: OpaquePointer) -> OpaquePointer {
let model = Unmanaged<SystemLanguageModel>.fromOpaque(UnsafeRawPointer(model)).takeUnretainedValue()
let session = LanguageModelSession(model: model)
return OpaquePointer(Unmanaged.passRetained(session).toOpaque())
}
@_cdecl("session_respond_to")
public func session_respond_to(_ session: OpaquePointer, _ prompt: UnsafeMutablePointer<Int8>, _ callback: @convention(c) (UnsafeMutablePointer<Int8>?, UnsafeMutablePointer<Int8>?, OpaquePointer) -> Void, _ info: OpaquePointer) {
let session = Unmanaged<LanguageModelSession>.fromOpaque(UnsafeRawPointer(session)).takeUnretainedValue()
let prompt = String(cString: prompt)
Task {
do {
let response = try await session.respond(to: prompt)
callback(strdup(response.content), nil, info)
} catch {
callback(nil, strdup(error.localizedDescription), info)
}
}
}
@_cdecl("session_destroy")
public func session_destroy(_ session: OpaquePointer) {
_ = Unmanaged<LanguageModelSession>.fromOpaque(UnsafeRawPointer(session)).takeRetainedValue()
}
This code can be compiled into a dynamic library using the following command:
swiftc -emit-library llm.swift
The C code can be compiled using the following command:
clang -L. -lllm llm.c
The result is a binary named a.out that uses the on-device LLM built into macOS.
(The code can be found here.)
Studied physics & CS; PhD in NLP; interested in AI, HPC & PLT
Loading discussion...
Hey! 👋
Got something to say?
or to leave a comment.