Codú
Petr Homola2 min read

A C API for Apple's on-device LLM

Apple provides a high-level Swift API for its on-device LLM. Sometimes it might be useful to have a C API, for example if you want to use the LLM from a low-level language.

The API might look as follows:

void* language_model_create();
bool model_is_available(void*);
void model_destroy(void*);

void* language_session_create(void*);
void session_respond_to(void*, char*, void(*)(char*, char*, void*), void*);
void session_destroy(void*);

A simple C example using this API might be something like:

void callback(char* response, char* error, void* info) {
    printf("'%s' '%s'\n", response, error);
    dispatch_semaphore_signal(info);
}

int main() {
    void* model = language_model_create();
    bool modelIsAvailable = model_is_available(model);
    printf("LLM is available: %s\n", modelIsAvailable ? "yes" : "no");
    if (!modelIsAvailable) exit(1);

    void* session = language_session_create(model);
    dispatch_semaphore_t sem = dispatch_semaphore_create(0);
    session_respond_to(session, "Tell me the capital of Greenland.", &callback, sem);
    dispatch_semaphore_wait(sem, DISPATCH_TIME_FOREVER);

    dispatch_release(sem);    
    session_destroy(session);
    model_destroy(model);
}

Note that since the API is asynchronous, we need a callback and a semaphore so we can wait for the response.

My implementation in Swift is as follows:

import Foundation
import FoundationModels

@_cdecl("language_model_create")
public func language_model_create() -> OpaquePointer {
    let model = SystemLanguageModel.default
    return OpaquePointer(Unmanaged.passRetained(model).toOpaque())
}

@_cdecl("model_is_available")
public func model_is_available(_ model: OpaquePointer) -> CBool {
    let model = Unmanaged<SystemLanguageModel>.fromOpaque(UnsafeRawPointer(model)).takeUnretainedValue()
    return model.isAvailable
}

@_cdecl("model_destroy")
public func model_destroy(_ model: OpaquePointer) {
    _ = Unmanaged<SystemLanguageModel>.fromOpaque(UnsafeRawPointer(model)).takeRetainedValue()
}

@_cdecl("language_session_create")
public func language_session_create(_ model: OpaquePointer) -> OpaquePointer {
    let model = Unmanaged<SystemLanguageModel>.fromOpaque(UnsafeRawPointer(model)).takeUnretainedValue()
    let session = LanguageModelSession(model: model)    
    return OpaquePointer(Unmanaged.passRetained(session).toOpaque())
}

@_cdecl("session_respond_to")
public func session_respond_to(_ session: OpaquePointer, _ prompt: UnsafeMutablePointer<Int8>, _ callback: @convention(c) (UnsafeMutablePointer<Int8>?, UnsafeMutablePointer<Int8>?, OpaquePointer) -> Void, _ info: OpaquePointer) {
    let session = Unmanaged<LanguageModelSession>.fromOpaque(UnsafeRawPointer(session)).takeUnretainedValue()
    let prompt = String(cString: prompt)
    Task {
        do {
            let response = try await session.respond(to: prompt)
            callback(strdup(response.content), nil, info)
        } catch {
            callback(nil, strdup(error.localizedDescription), info)
        }
    }
}

@_cdecl("session_destroy")
public func session_destroy(_ session: OpaquePointer) {
    _ = Unmanaged<LanguageModelSession>.fromOpaque(UnsafeRawPointer(session)).takeRetainedValue()
}

This code can be compiled into a dynamic library using the following command:

swiftc -emit-library llm.swift

The C code can be compiled using the following command:

clang -L. -lllm llm.c

The result is a binary named a.out that uses the on-device LLM built into macOS.

(The code can be found here.)

LlmSwiftFfi
Petr Homola@petr-homola-dub

Studied physics & CS; PhD in NLP; interested in AI, HPC & PLT

Loading

Loading discussion...

Hey! 👋

Got something to say?

or to leave a comment.