Running LLMs on AOS (Highly Experimental)
You can run LLMs on top of AOS using the right module.
First create a test project.
npx wao create llm && cd llm
Create a directory and download one of the tiny models from Hugging Face.
We will try TinyLlama-1.1B-Chat-v1.0-GGUF for this tutorial.
mkdir test/models
curl -L -o test/models/tinyllama.gguf "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q2_K.gguf?download=true"
Write tests with WAO.
import assert from "assert"
import { resolve } from "path"
import { readFileSync } from "fs"
import { afterEach, after, describe, it, before, beforeEach } from "node:test"
import { AO, acc } from "wao/test"
const __dirname = import.meta.dirname
const src_data = `
Llama = require(".Llama")
Llama.logLevel = 4
Handlers.add("Load", "Load", function (msg)
Llama.load("/data/" .. msg.ModelID)
msg.reply({ Data = "true" })
end)
Handlers.add("Ask", "Ask", function (msg)
Llama.setPrompt(msg.Q)
msg.reply({ Data = Llama.run(50) })
end)`
describe("LLM", function () {
it("should infer with Tinyllama", async () => {
const ao = await new AO().init(acc[0])
const model = readFileSync(resolve(__dirname, "models/tinyllama.gguf"))
const { id } = await ao.ar.post({ data: model })
const data = readFileSync(
resolve(__dirname, "../node_modules/wao/esm/lua/llama.wasm"),
)
const { id: modid } = await ao.postModule({
data,
tags: { "Memory-Limit": "1-gb" },
})
const { p, pid, err } = await ao.deploy({
tags: { Extension: "WeaveDrive", Attestor: ao.ar.addr },
module: modid,
src_data,
})
await ao.attest({ id })
await p.m("Load", { ModelID: id })
console.log(await p.d("Ask", { Q: "How are you?" }, false))
})
})
phi-2-GGUF would be a much better model for chat, but a bit too heavy for CPU.
mkdir test/models
curl -L -o test/models/phi2.gguf "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q2_K.gguf?download=true"
it("should infer with Phi2", async () => {
const ao = await new AO().init(acc[0])
const model = readFileSync(resolve(__dirname, "models/phi2.gguf"))
const { id } = await ao.ar.post({ data: model })
const data = readFileSync(
resolve(__dirname, "../node_modules/wao/esm/lua/llama.wasm"),
)
const { id: modid } = await ao.postModule({
data,
tags: { "Memory-Limit": "2-gb" }, // the model size is more than 1GB
})
const { p, pid, err } = await ao.deploy({
tags: { Extension: "WeaveDrive", Attestor: ao.ar.addr },
module: modid,
src_data,
})
await ao.attest({ id })
await p.m("Load", { ModelID: id })
console.log(await p.d("Ask", { Q: "How are you?" }, false))
})