Multimodal Support NodeSDK (#2320)

This commit is contained in:
Saket Aryan
2025-03-06 17:50:41 +05:30
committed by GitHub
parent 2c31a930a3
commit 6d7ef3ae45
12 changed files with 248 additions and 26 deletions

View File

@@ -27,9 +27,15 @@ export class AnthropicLLM implements LLM {
model: this.model,
messages: otherMessages.map((msg) => ({
role: msg.role as "user" | "assistant",
content: msg.content,
content:
typeof msg.content === "string"
? msg.content
: msg.content.image_url.url,
})),
system: systemMessage?.content,
system:
typeof systemMessage?.content === "string"
? systemMessage.content
: undefined,
max_tokens: 4096,
});

View File

@@ -23,7 +23,10 @@ export class GroqLLM implements LLM {
model: this.model,
messages: messages.map((msg) => ({
role: msg.role as "system" | "user" | "assistant",
content: msg.content,
content:
typeof msg.content === "string"
? msg.content
: JSON.stringify(msg.content),
})),
response_format: responseFormat as { type: "text" | "json_object" },
});
@@ -36,7 +39,10 @@ export class GroqLLM implements LLM {
model: this.model,
messages: messages.map((msg) => ({
role: msg.role as "system" | "user" | "assistant",
content: msg.content,
content:
typeof msg.content === "string"
? msg.content
: JSON.stringify(msg.content),
})),
});

View File

@@ -8,7 +8,7 @@ export class OpenAILLM implements LLM {
constructor(config: LLMConfig) {
this.openai = new OpenAI({ apiKey: config.apiKey });
this.model = config.model || "gpt-4-turbo-preview";
this.model = config.model || "gpt-4o-mini";
}
async generateResponse(
@@ -17,10 +17,16 @@ export class OpenAILLM implements LLM {
tools?: any[],
): Promise<string | LLMResponse> {
const completion = await this.openai.chat.completions.create({
messages: messages.map((msg) => ({
role: msg.role as "system" | "user" | "assistant",
content: msg.content,
})),
messages: messages.map((msg) => {
const role = msg.role as "system" | "user" | "assistant";
return {
role,
content:
typeof msg.content === "string"
? msg.content
: JSON.stringify(msg.content),
};
}),
model: this.model,
response_format: responseFormat as { type: "text" | "json_object" },
...(tools && { tools, tool_choice: "auto" }),
@@ -44,10 +50,16 @@ export class OpenAILLM implements LLM {
async generateChat(messages: Message[]): Promise<LLMResponse> {
const completion = await this.openai.chat.completions.create({
messages: messages.map((msg) => ({
role: msg.role as "system" | "user" | "assistant",
content: msg.content,
})),
messages: messages.map((msg) => {
const role = msg.role as "system" | "user" | "assistant";
return {
role,
content:
typeof msg.content === "string"
? msg.content
: JSON.stringify(msg.content),
};
}),
model: this.model,
});
const response = completion.choices[0].message;

View File

@@ -19,7 +19,10 @@ export class OpenAIStructuredLLM implements LLM {
const completion = await this.openai.chat.completions.create({
messages: messages.map((msg) => ({
role: msg.role as "system" | "user" | "assistant",
content: msg.content,
content:
typeof msg.content === "string"
? msg.content
: JSON.stringify(msg.content),
})),
model: this.model,
...(tools
@@ -63,7 +66,10 @@ export class OpenAIStructuredLLM implements LLM {
const completion = await this.openai.chat.completions.create({
messages: messages.map((msg) => ({
role: msg.role as "system" | "user" | "assistant",
content: msg.content,
content:
typeof msg.content === "string"
? msg.content
: JSON.stringify(msg.content),
})),
model: this.model,
});

View File

@@ -31,6 +31,7 @@ import {
DeleteAllMemoryOptions,
GetAllMemoryOptions,
} from "./memory.types";
import { parse_vision_messages } from "../utils/memory";
export class Memory {
private config: MemoryConfig;
@@ -109,9 +110,11 @@ export class Memory {
? (messages as Message[])
: [{ role: "user", content: messages }];
const final_parsedMessages = await parse_vision_messages(parsedMessages);
// Add to vector store
const vectorStoreResult = await this.addToVectorStore(
parsedMessages,
final_parsedMessages,
metadata,
filters,
);
@@ -121,7 +124,7 @@ export class Memory {
if (this.graphMemory) {
try {
graphResult = await this.graphMemory.add(
parsedMessages.map((m) => m.content).join("\n"),
final_parsedMessages.map((m) => m.content).join("\n"),
filters,
);
} catch (error) {

View File

@@ -1,8 +1,15 @@
import { z } from "zod";
export interface MultiModalMessages {
type: "image_url";
image_url: {
url: string;
};
}
export interface Message {
role: string;
content: string;
content: string | MultiModalMessages;
}
export interface EmbeddingConfig {

View File

@@ -0,0 +1,48 @@
import { OpenAILLM } from "../llms/openai";
import { Message } from "../types";
const get_image_description = async (image_url: string) => {
const llm = new OpenAILLM({
apiKey: process.env.OPENAI_API_KEY,
});
const response = await llm.generateResponse([
{
role: "user",
content:
"Provide a description of the image and do not include any additional text.",
},
{
role: "user",
content: { type: "image_url", image_url: { url: image_url } },
},
]);
return response;
};
const parse_vision_messages = async (messages: Message[]) => {
const parsed_messages = [];
for (const message of messages) {
let new_message = {
role: message.role,
content: "",
};
if (message.role !== "system") {
if (
typeof message.content === "object" &&
message.content.type === "image_url"
) {
const description = await get_image_description(
message.content.image_url.url,
);
new_message.content =
typeof description === "string"
? description
: JSON.stringify(description);
parsed_messages.push(new_message);
} else parsed_messages.push(message);
}
}
return parsed_messages;
};
export { parse_vision_messages };