From e10a5096452d14d51079dead057a6dc7d55d6910 Mon Sep 17 00:00:00 2001 From: Mrinank Bhowmick <77621953+Mrinank-Bhowmick@users.noreply.github.com> Date: Fri, 6 Jun 2025 21:35:40 +0530 Subject: [PATCH] Added cloudflare vector-store (#2607) --- docs/components/vectordbs/dbs/vectorize.mdx | 45 ++ docs/components/vectordbs/overview.mdx | 2 +- mem0-ts/src/oss/package.json | 2 + mem0-ts/src/oss/src/index.ts | 1 + mem0-ts/src/oss/src/utils/factory.ts | 3 + .../src/oss/src/vector_stores/vectorize.ts | 436 ++++++++++++++++++ 6 files changed, 488 insertions(+), 1 deletion(-) create mode 100644 docs/components/vectordbs/dbs/vectorize.mdx create mode 100644 mem0-ts/src/oss/src/vector_stores/vectorize.ts diff --git a/docs/components/vectordbs/dbs/vectorize.mdx b/docs/components/vectordbs/dbs/vectorize.mdx new file mode 100644 index 00000000..de520529 --- /dev/null +++ b/docs/components/vectordbs/dbs/vectorize.mdx @@ -0,0 +1,45 @@ +[Cloudflare Vectorize](https://developers.cloudflare.com/vectorize/) is a vector database offering from Cloudflare, allowing you to build AI-powered applications with vector embeddings. + +### Usage + + +```typescript TypeScript +import { Memory } from 'mem0ai/oss'; + +const config = { + vectorStore: { + provider: 'vectorize', + config: { + indexName: 'my-memory-index', + accountId: 'your-cloudflare-account-id', + apiKey: 'your-cloudflare-api-key', + dimension: 1536, // Optional: defaults to 1536 + }, + }, +}; + +const memory = new Memory(config); +const messages = [ + {"role": "user", "content": "I'm looking for a good book to read."}, + {"role": "assistant", "content": "Sure, what genre are you interested in?"}, + {"role": "user", "content": "I enjoy fantasy novels with strong world-building."}, + {"role": "assistant", "content": "Great! I'll keep that in mind for future recommendations."} +] +await memory.add(messages, { userId: "bob", metadata: { interest: "books" } }); +``` + + +### Config + +Let's see the available parameters for the `vectorize` config: + + + +| Parameter | Description | Default Value | +| --- | --- | --- | +| `indexName` | The name of the Vectorize index | `None` (Required) | +| `accountId` | Your Cloudflare account ID | `None` (Required) | +| `apiKey` | Your Cloudflare API token | `None` (Required) | +| `dimension` | Dimensions of the embedding model | `1536` | + + diff --git a/docs/components/vectordbs/overview.mdx b/docs/components/vectordbs/overview.mdx index bf034882..f0d87be1 100644 --- a/docs/components/vectordbs/overview.mdx +++ b/docs/components/vectordbs/overview.mdx @@ -13,7 +13,7 @@ Mem0 includes built-in support for various popular databases. Memory can utilize See the list of supported vector databases below. - The following vector databases are supported in the Python implementation. The TypeScript implementation currently only supports Qdrant, Redis and in-memory vector database. + The following vector databases are supported in the Python implementation. The TypeScript implementation currently only supports Qdrant, Redis,Vectorize and in-memory vector database. diff --git a/mem0-ts/src/oss/package.json b/mem0-ts/src/oss/package.json index 15a88382..e2cb5ca6 100644 --- a/mem0-ts/src/oss/package.json +++ b/mem0-ts/src/oss/package.json @@ -21,6 +21,7 @@ "@types/redis": "^4.0.10", "@types/sqlite3": "^3.1.11", "@types/uuid": "^9.0.8", + "cloudflare": "^4.2.0", "dotenv": "^16.4.4", "groq-sdk": "^0.3.0", "openai": "^4.28.0", @@ -31,6 +32,7 @@ "zod": "^3.22.4" }, "devDependencies": { + "@cloudflare/workers-types": "^4.20250504.0", "@types/jest": "^29.5.12", "jest": "^29.7.0", "rimraf": "^5.0.5", diff --git a/mem0-ts/src/oss/src/index.ts b/mem0-ts/src/oss/src/index.ts index 98aafd06..fd914605 100644 --- a/mem0-ts/src/oss/src/index.ts +++ b/mem0-ts/src/oss/src/index.ts @@ -22,4 +22,5 @@ export * from "./vector_stores/qdrant"; export * from "./vector_stores/redis"; export * from "./vector_stores/supabase"; export * from "./vector_stores/langchain"; +export * from "./vector_stores/vectorize"; export * from "./utils/factory"; diff --git a/mem0-ts/src/oss/src/utils/factory.ts b/mem0-ts/src/oss/src/utils/factory.ts index 894963f3..8556f057 100644 --- a/mem0-ts/src/oss/src/utils/factory.ts +++ b/mem0-ts/src/oss/src/utils/factory.ts @@ -16,6 +16,7 @@ import { Embedder } from "../embeddings/base"; import { LLM } from "../llms/base"; import { VectorStore } from "../vector_stores/base"; import { Qdrant } from "../vector_stores/qdrant"; +import { VectorizeDB } from "../vector_stores/vectorize"; import { RedisDB } from "../vector_stores/redis"; import { OllamaLLM } from "../llms/ollama"; import { SupabaseDB } from "../vector_stores/supabase"; @@ -90,6 +91,8 @@ export class VectorStoreFactory { return new SupabaseDB(config as any); case "langchain": return new LangchainVectorStore(config as any); + case "vectorize": + return new VectorizeDB(config as any); default: throw new Error(`Unsupported vector store provider: ${provider}`); } diff --git a/mem0-ts/src/oss/src/vector_stores/vectorize.ts b/mem0-ts/src/oss/src/vector_stores/vectorize.ts new file mode 100644 index 00000000..d2b63563 --- /dev/null +++ b/mem0-ts/src/oss/src/vector_stores/vectorize.ts @@ -0,0 +1,436 @@ +import Cloudflare from "cloudflare"; +import type { Vectorize, VectorizeVector } from "@cloudflare/workers-types"; +import { VectorStore } from "./base"; +import { SearchFilters, VectorStoreConfig, VectorStoreResult } from "../types"; + +interface VectorizeConfig extends VectorStoreConfig { + apiKey?: string; + indexName: string; + accountId: string; +} + +interface CloudflareVector { + id: string; + values: number[]; + metadata?: Record; +} + +export class VectorizeDB implements VectorStore { + private client: Cloudflare | null = null; + private dimensions: number; + private indexName: string; + private accountId: string; + + constructor(config: VectorizeConfig) { + this.client = new Cloudflare({ apiToken: config.apiKey }); + this.dimensions = config.dimension || 1536; + this.indexName = config.indexName; + this.accountId = config.accountId; + this.initialize().catch(console.error); + } + + async insert( + vectors: number[][], + ids: string[], + payloads: Record[] + ): Promise { + try { + const vectorObjects: CloudflareVector[] = vectors.map( + (vector, index) => ({ + id: ids[index], + values: vector, + metadata: payloads[index] || {}, + }) + ); + + const ndjsonPayload = vectorObjects + .map((v) => JSON.stringify(v)) + .join("\n"); + + const response = await fetch( + `https://api.cloudflare.com/client/v4/accounts/${this.accountId}/vectorize/v2/indexes/${this.indexName}/insert`, + { + method: "POST", + headers: { + "Content-Type": "application/x-ndjson", + Authorization: `Bearer ${this.client?.apiToken}`, + }, + body: ndjsonPayload, + } + ); + + if (!response.ok) { + const errorText = await response.text(); + throw new Error( + `Failed to insert vectors: ${response.status} ${errorText}` + ); + } + } catch (error) { + console.error("Error inserting vectors:", error); + throw new Error( + `Failed to insert vectors: ${error instanceof Error ? error.message : String(error)}` + ); + } + } + + async search( + query: number[], + limit: number = 5, + filters?: SearchFilters + ): Promise { + try { + const result = await this.client?.vectorize.indexes.query( + this.indexName, + { + account_id: this.accountId, + vector: query, + filter: filters, + returnMetadata: "all", + topK: limit, + } + ); + + return ( + (result?.matches?.map((match) => ({ + id: match.id, + payload: match.metadata, + score: match.score, + })) as VectorStoreResult[]) || [] + ); // Return empty array if result or matches is null/undefined + } catch (error) { + console.error("Error searching vectors:", error); + throw new Error( + `Failed to search vectors: ${error instanceof Error ? error.message : String(error)}` + ); + } + } + + async get(vectorId: string): Promise { + try { + const result = (await this.client?.vectorize.indexes.getByIds( + this.indexName, + { + account_id: this.accountId, + ids: [vectorId], + } + )) as any; + + if (!result?.length) return null; + + return { + id: vectorId, + payload: result[0].metadata, + }; + } catch (error) { + console.error("Error getting vector:", error); + throw new Error( + `Failed to get vector: ${error instanceof Error ? error.message : String(error)}` + ); + } + } + + async update( + vectorId: string, + vector: number[], + payload: Record + ): Promise { + try { + const data: VectorizeVector = { + id: vectorId, + values: vector, + metadata: payload, + }; + + const response = await fetch( + `https://api.cloudflare.com/client/v4/accounts/${this.accountId}/vectorize/v2/indexes/${this.indexName}/upsert`, + { + method: "POST", + headers: { + "Content-Type": "application/x-ndjson", + Authorization: `Bearer ${this.client?.apiToken}`, + }, + body: JSON.stringify(data) + "\n", // ndjson format + } + ); + + if (!response.ok) { + const errorText = await response.text(); + throw new Error( + `Failed to update vector: ${response.status} ${errorText}` + ); + } + } catch (error) { + console.error("Error updating vector:", error); + throw new Error( + `Failed to update vector: ${error instanceof Error ? error.message : String(error)}` + ); + } + } + + async delete(vectorId: string): Promise { + try { + await this.client?.vectorize.indexes.deleteByIds(this.indexName, { + account_id: this.accountId, + ids: [vectorId], + }); + } catch (error) { + console.error("Error deleting vector:", error); + throw new Error( + `Failed to delete vector: ${error instanceof Error ? error.message : String(error)}` + ); + } + } + + async deleteCol(): Promise { + try { + await this.client?.vectorize.indexes.delete(this.indexName, { + account_id: this.accountId, + }); + } catch (error) { + console.error("Error deleting collection:", error); + throw new Error( + `Failed to delete collection: ${error instanceof Error ? error.message : String(error)}` + ); + } + } + + async list( + filters?: SearchFilters, + limit: number = 20 + ): Promise<[VectorStoreResult[], number]> { + try { + const result = await this.client?.vectorize.indexes.query( + this.indexName, + { + account_id: this.accountId, + vector: Array(this.dimensions).fill(0), // Dummy vector for listing + filter: filters, + topK: limit, + returnMetadata: "all", + } + ); + + const matches = + (result?.matches?.map((match) => ({ + id: match.id, + payload: match.metadata, + score: match.score, + })) as VectorStoreResult[]) || []; + + return [matches, matches.length]; + } catch (error) { + console.error("Error listing vectors:", error); + throw new Error( + `Failed to list vectors: ${error instanceof Error ? error.message : String(error)}` + ); + } + } + + private generateUUID(): string { + return "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx".replace( + /[xy]/g, + function (c) { + const r = (Math.random() * 16) | 0; + const v = c === "x" ? r : (r & 0x3) | 0x8; + return v.toString(16); + } + ); + } + + async getUserId(): Promise { + try { + let found = false; + for await (const index of this.client!.vectorize.indexes.list({ + account_id: this.accountId, + })) { + if (index.name === "memory_migrations") { + found = true; + } + } + + if (!found) { + await this.client?.vectorize.indexes.create({ + account_id: this.accountId, + name: "memory_migrations", + config: { + dimensions: 1, + metric: "cosine", + }, + }); + } + + // Now try to get the userId + const result: any = await this.client?.vectorize.indexes.query( + "memory_migrations", + { + account_id: this.accountId, + vector: [0], + topK: 1, + returnMetadata: "all", + } + ); + if (result.matches.length > 0) { + return result.matches[0].metadata.userId as string; + } + + // Generate a random userId if none exists + const randomUserId = + Math.random().toString(36).substring(2, 15) + + Math.random().toString(36).substring(2, 15); + const data: VectorizeVector = { + id: this.generateUUID(), + values: [0], + metadata: { userId: randomUserId }, + }; + + await fetch( + `https://api.cloudflare.com/client/v4/accounts/${this.accountId}/vectorize/v2/indexes/memory_migrations/upsert`, + { + method: "POST", + headers: { + "Content-Type": "application/x-ndjson", + Authorization: `Bearer ${this.client?.apiToken}`, + }, + body: JSON.stringify(data) + "\n", // ndjson format + } + ); + return randomUserId; + } catch (error) { + console.error("Error getting user ID:", error); + throw new Error( + `Failed to get user ID: ${error instanceof Error ? error.message : String(error)}` + ); + } + } + + async setUserId(userId: string): Promise { + try { + // Get existing point ID + const result: any = await this.client?.vectorize.indexes.query( + "memory_migrations", + { + account_id: this.accountId, + vector: [0], + topK: 1, + returnMetadata: "all", + } + ); + const pointId = + result.matches.length > 0 ? result.matches[0].id : this.generateUUID(); + + const data: VectorizeVector = { + id: pointId, + values: [0], + metadata: { userId }, + }; + await fetch( + `https://api.cloudflare.com/client/v4/accounts/${this.accountId}/vectorize/v2/indexes/memory_migrations/upsert`, + { + method: "POST", + headers: { + "Content-Type": "application/x-ndjson", + Authorization: `Bearer ${this.client?.apiToken}`, + }, + body: JSON.stringify(data) + "\n", // ndjson format + } + ); + } catch (error) { + console.error("Error setting user ID:", error); + throw new Error( + `Failed to set user ID: ${error instanceof Error ? error.message : String(error)}` + ); + } + } + + async initialize(): Promise { + try { + // Check if the index already exists + let indexFound = false; + for await (const idx of this.client!.vectorize.indexes.list({ + account_id: this.accountId, + })) { + if (idx.name === this.indexName) { + indexFound = true; + break; + } + } + // If the index doesn't exist, create it + if (!indexFound) { + try { + await this.client?.vectorize.indexes.create({ + account_id: this.accountId, + name: this.indexName, + config: { + dimensions: this.dimensions, + metric: "cosine", + }, + }); + + const properties = ["userId", "agentId", "runId"]; + + for (const propertyName of properties) { + await this.client?.vectorize.indexes.metadataIndex.create( + this.indexName, + { + account_id: this.accountId, + indexType: "string", + propertyName, + } + ); + } + } catch (err: any) { + throw new Error(err); + } + } + + // check for metadata index + const metadataIndexes = + await this.client?.vectorize.indexes.metadataIndex.list( + this.indexName, + { + account_id: this.accountId, + } + ); + const existingMetadataIndexes = new Set(); + for (const metadataIndex of metadataIndexes?.metadataIndexes || []) { + existingMetadataIndexes.add(metadataIndex.propertyName!); + } + const properties = ["userId", "agentId", "runId"]; + for (const propertyName of properties) { + if (!existingMetadataIndexes.has(propertyName)) { + await this.client?.vectorize.indexes.metadataIndex.create( + this.indexName, + { + account_id: this.accountId, + indexType: "string", + propertyName, + } + ); + } + } + // Create memory_migrations collection if it doesn't exist + let found = false; + for await (const index of this.client!.vectorize.indexes.list({ + account_id: this.accountId, + })) { + if (index.name === "memory_migrations") { + found = true; + break; + } + } + + if (!found) { + await this.client?.vectorize.indexes.create({ + account_id: this.accountId, + name: "memory_migrations", + config: { + dimensions: 1, + metric: "cosine", + }, + }); + } + } catch (err: any) { + throw new Error(err); + } + } +}