Rename embedchain to mem0 and open sourcing code for long term memory (#1474)

Co-authored-by: Deshraj Yadav <deshrajdry@gmail.com>
This commit is contained in:
Taranjeet Singh
2024-07-12 07:51:33 -07:00
committed by GitHub
parent 83e8c97295
commit f842a92e25
665 changed files with 9427 additions and 6592 deletions

View File

@@ -0,0 +1,4 @@
.env
app.db
configs/**.yaml
db

View File

@@ -0,0 +1,4 @@
.env
app.db
configs/**.yaml
db

View File

@@ -0,0 +1,15 @@
FROM python:3.11-slim
WORKDIR /app
COPY requirements.txt /app/
RUN pip install --no-cache-dir -r requirements.txt
COPY . /app
EXPOSE 8080
ENV NAME embedchain
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]

View File

@@ -0,0 +1,21 @@
## Single command to rule them all,
```bash
docker run -d --name embedchain -p 8080:8080 embedchain/rest-api:latest
```
### To run the app locally,
```bash
# will help reload on changes
DEVELOPMENT=True && python -m main
```
Using docker (locally),
```bash
docker build -t embedchain/rest-api:latest .
docker run -d --name embedchain -p 8080:8080 embedchain/rest-api:latest
docker image push embedchain/rest-api:latest
```

View File

View File

@@ -0,0 +1,5 @@
{
"version": "1",
"name": "ec-rest-api",
"type": "collection"
}

View File

@@ -0,0 +1,18 @@
meta {
name: default_add
type: http
seq: 3
}
post {
url: http://localhost:8080/add
body: json
auth: none
}
body:json {
{
"source": "source_url",
"data_type": "data_type"
}
}

View File

@@ -0,0 +1,17 @@
meta {
name: default_chat
type: http
seq: 4
}
post {
url: http://localhost:8080/chat
body: json
auth: none
}
body:json {
{
"message": "message"
}
}

View File

@@ -0,0 +1,17 @@
meta {
name: default_query
type: http
seq: 2
}
post {
url: http://localhost:8080/query
body: json
auth: none
}
body:json {
{
"query": "Who is Elon Musk?"
}
}

View File

@@ -0,0 +1,11 @@
meta {
name: ping
type: http
seq: 1
}
get {
url: http://localhost:8080/ping
body: json
auth: none
}

View File

@@ -0,0 +1,3 @@
### Config directory
Here, all the YAML files will get stored.

View File

@@ -0,0 +1,11 @@
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
SQLALCHEMY_DATABASE_URI = "sqlite:///./app.db"
engine = create_engine(SQLALCHEMY_DATABASE_URI, connect_args={"check_same_thread": False})
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
Base = declarative_base()

View File

@@ -0,0 +1,17 @@
app:
config:
id: 'default'
llm:
provider: gpt4all
config:
model: 'orca-mini-3b-gguf2-q4_0.gguf'
temperature: 0.5
max_tokens: 1000
top_p: 1
stream: false
embedder:
provider: gpt4all
config:
model: 'all-MiniLM-L6-v2'

View File

@@ -0,0 +1,326 @@
import logging
import os
import aiofiles
import yaml
from database import Base, SessionLocal, engine
from fastapi import Depends, FastAPI, HTTPException, UploadFile
from models import DefaultResponse, DeployAppRequest, QueryApp, SourceApp
from services import get_app, get_apps, remove_app, save_app
from sqlalchemy.orm import Session
from utils import generate_error_message_for_api_keys
from embedchain import App
from embedchain.client import Client
logger = logging.getLogger(__name__)
Base.metadata.create_all(bind=engine)
def get_db():
db = SessionLocal()
try:
yield db
finally:
db.close()
app = FastAPI(
title="Embedchain REST API",
description="This is the REST API for Embedchain.",
version="0.0.1",
license_info={
"name": "Apache 2.0",
"url": "https://github.com/embedchain/embedchain/blob/main/LICENSE",
},
)
@app.get("/ping", tags=["Utility"])
def check_status():
"""
Endpoint to check the status of the API
"""
return {"ping": "pong"}
@app.get("/apps", tags=["Apps"])
async def get_all_apps(db: Session = Depends(get_db)):
"""
Get all apps.
"""
apps = get_apps(db)
return {"results": apps}
@app.post("/create", tags=["Apps"], response_model=DefaultResponse)
async def create_app_using_default_config(app_id: str, config: UploadFile = None, db: Session = Depends(get_db)):
"""
Create a new app using App ID.
If you don't provide a config file, Embedchain will use the default config file\n
which uses opensource GPT4ALL model.\n
app_id: The ID of the app.\n
config: The YAML config file to create an App.\n
"""
try:
if app_id is None:
raise HTTPException(detail="App ID not provided.", status_code=400)
if get_app(db, app_id) is not None:
raise HTTPException(detail=f"App with id '{app_id}' already exists.", status_code=400)
yaml_path = "default.yaml"
if config is not None:
contents = await config.read()
try:
yaml.safe_load(contents)
# TODO: validate the config yaml file here
yaml_path = f"configs/{app_id}.yaml"
async with aiofiles.open(yaml_path, mode="w") as file_out:
await file_out.write(str(contents, "utf-8"))
except yaml.YAMLError as exc:
raise HTTPException(detail=f"Error parsing YAML: {exc}", status_code=400)
save_app(db, app_id, yaml_path)
return DefaultResponse(response=f"App created successfully. App ID: {app_id}")
except Exception as e:
logger.warning(str(e))
raise HTTPException(detail=f"Error creating app: {str(e)}", status_code=400)
@app.get(
"/{app_id}/data",
tags=["Apps"],
)
async def get_datasources_associated_with_app_id(app_id: str, db: Session = Depends(get_db)):
"""
Get all data sources for an app.\n
app_id: The ID of the app. Use "default" for the default app.\n
"""
try:
if app_id is None:
raise HTTPException(
detail="App ID not provided. If you want to use the default app, use 'default' as the app_id.",
status_code=400,
)
db_app = get_app(db, app_id)
if db_app is None:
raise HTTPException(detail=f"App with id {app_id} does not exist, please create it first.", status_code=400)
app = App.from_config(config_path=db_app.config)
response = app.get_data_sources()
return {"results": response}
except ValueError as ve:
logger.warning(str(ve))
raise HTTPException(
detail=generate_error_message_for_api_keys(ve),
status_code=400,
)
except Exception as e:
logger.warning(str(e))
raise HTTPException(detail=f"Error occurred: {str(e)}", status_code=400)
@app.post(
"/{app_id}/add",
tags=["Apps"],
response_model=DefaultResponse,
)
async def add_datasource_to_an_app(body: SourceApp, app_id: str, db: Session = Depends(get_db)):
"""
Add a source to an existing app.\n
app_id: The ID of the app. Use "default" for the default app.\n
source: The source to add.\n
data_type: The data type of the source. Remove it if you want Embedchain to detect it automatically.\n
"""
try:
if app_id is None:
raise HTTPException(
detail="App ID not provided. If you want to use the default app, use 'default' as the app_id.",
status_code=400,
)
db_app = get_app(db, app_id)
if db_app is None:
raise HTTPException(detail=f"App with id {app_id} does not exist, please create it first.", status_code=400)
app = App.from_config(config_path=db_app.config)
response = app.add(source=body.source, data_type=body.data_type)
return DefaultResponse(response=response)
except ValueError as ve:
logger.warning(str(ve))
raise HTTPException(
detail=generate_error_message_for_api_keys(ve),
status_code=400,
)
except Exception as e:
logger.warning(str(e))
raise HTTPException(detail=f"Error occurred: {str(e)}", status_code=400)
@app.post(
"/{app_id}/query",
tags=["Apps"],
response_model=DefaultResponse,
)
async def query_an_app(body: QueryApp, app_id: str, db: Session = Depends(get_db)):
"""
Query an existing app.\n
app_id: The ID of the app. Use "default" for the default app.\n
query: The query that you want to ask the App.\n
"""
try:
if app_id is None:
raise HTTPException(
detail="App ID not provided. If you want to use the default app, use 'default' as the app_id.",
status_code=400,
)
db_app = get_app(db, app_id)
if db_app is None:
raise HTTPException(detail=f"App with id {app_id} does not exist, please create it first.", status_code=400)
app = App.from_config(config_path=db_app.config)
response = app.query(body.query)
return DefaultResponse(response=response)
except ValueError as ve:
logger.warning(str(ve))
raise HTTPException(
detail=generate_error_message_for_api_keys(ve),
status_code=400,
)
except Exception as e:
logger.warning(str(e))
raise HTTPException(detail=f"Error occurred: {str(e)}", status_code=400)
# FIXME: The chat implementation of Embedchain needs to be modified to work with the REST API.
# @app.post(
# "/{app_id}/chat",
# tags=["Apps"],
# response_model=DefaultResponse,
# )
# async def chat_with_an_app(body: MessageApp, app_id: str, db: Session = Depends(get_db)):
# """
# Query an existing app.\n
# app_id: The ID of the app. Use "default" for the default app.\n
# message: The message that you want to send to the App.\n
# """
# try:
# if app_id is None:
# raise HTTPException(
# detail="App ID not provided. If you want to use the default app, use 'default' as the app_id.",
# status_code=400,
# )
# db_app = get_app(db, app_id)
# if db_app is None:
# raise HTTPException(
# detail=f"App with id {app_id} does not exist, please create it first.",
# status_code=400
# )
# app = App.from_config(config_path=db_app.config)
# response = app.chat(body.message)
# return DefaultResponse(response=response)
# except ValueError as ve:
# raise HTTPException(
# detail=generate_error_message_for_api_keys(ve),
# status_code=400,
# )
# except Exception as e:
# raise HTTPException(detail=f"Error occurred: {str(e)}", status_code=400)
@app.post(
"/{app_id}/deploy",
tags=["Apps"],
response_model=DefaultResponse,
)
async def deploy_app(body: DeployAppRequest, app_id: str, db: Session = Depends(get_db)):
"""
Query an existing app.\n
app_id: The ID of the app. Use "default" for the default app.\n
api_key: The API key to use for deployment. If not provided,
Embedchain will use the API key previously used (if any).\n
"""
try:
if app_id is None:
raise HTTPException(
detail="App ID not provided. If you want to use the default app, use 'default' as the app_id.",
status_code=400,
)
db_app = get_app(db, app_id)
if db_app is None:
raise HTTPException(detail=f"App with id {app_id} does not exist, please create it first.", status_code=400)
app = App.from_config(config_path=db_app.config)
api_key = body.api_key
# this will save the api key in the embedchain.db
Client(api_key=api_key)
app.deploy()
return DefaultResponse(response="App deployed successfully.")
except ValueError as ve:
logger.warning(str(ve))
raise HTTPException(
detail=generate_error_message_for_api_keys(ve),
status_code=400,
)
except Exception as e:
logger.warning(str(e))
raise HTTPException(detail=f"Error occurred: {str(e)}", status_code=400)
@app.delete(
"/{app_id}/delete",
tags=["Apps"],
response_model=DefaultResponse,
)
async def delete_app(app_id: str, db: Session = Depends(get_db)):
"""
Delete an existing app.\n
app_id: The ID of the app to be deleted.
"""
try:
if app_id is None:
raise HTTPException(
detail="App ID not provided. If you want to use the default app, use 'default' as the app_id.",
status_code=400,
)
db_app = get_app(db, app_id)
if db_app is None:
raise HTTPException(detail=f"App with id {app_id} does not exist, please create it first.", status_code=400)
app = App.from_config(config_path=db_app.config)
# reset app.db
app.db.reset()
remove_app(db, app_id)
return DefaultResponse(response=f"App with id {app_id} deleted successfully.")
except Exception as e:
raise HTTPException(detail=f"Error occurred: {str(e)}", status_code=400)
if __name__ == "__main__":
import uvicorn
is_dev = os.getenv("DEVELOPMENT", "False")
uvicorn.run("main:app", host="0.0.0.0", port=8080, reload=bool(is_dev))

View File

@@ -0,0 +1,46 @@
from typing import Optional
from database import Base
from pydantic import BaseModel, Field
from sqlalchemy import Column, Integer, String
class QueryApp(BaseModel):
query: str = Field("", description="The query that you want to ask the App.")
model_config = {
"json_schema_extra": {
"example": {
"query": "Who is Elon Musk?",
}
}
}
class SourceApp(BaseModel):
source: str = Field("", description="The source that you want to add to the App.")
data_type: Optional[str] = Field("", description="The type of data to add, remove it for autosense.")
model_config = {"json_schema_extra": {"example": {"source": "https://en.wikipedia.org/wiki/Elon_Musk"}}}
class DeployAppRequest(BaseModel):
api_key: str = Field("", description="The Embedchain API key for App deployments.")
model_config = {"json_schema_extra": {"example": {"api_key": "ec-xxx"}}}
class MessageApp(BaseModel):
message: str = Field("", description="The message that you want to send to the App.")
class DefaultResponse(BaseModel):
response: str
class AppModel(Base):
__tablename__ = "apps"
id = Column(Integer, primary_key=True, index=True)
app_id = Column(String, unique=True, index=True)
config = Column(String, unique=True, index=True)

View File

@@ -0,0 +1,24 @@
fastapi==0.104.0
uvicorn==0.23.2
streamlit==1.29.0
embedchain==0.1.3
slack-sdk==3.21.3
flask==2.3.3
fastapi-poe==0.0.16
discord==2.3.2
twilio==8.5.0
huggingface-hub==0.17.3
embedchain[community, opensource, elasticsearch, opensearch, weaviate, pinecone, qdrant, images, cohere, together, milvus, vertexai, llama2, gmail, json]==0.1.3
sqlalchemy==2.0.22
python-multipart==0.0.6
youtube-transcript-api==0.6.1
pytube==15.0.0
beautifulsoup4==4.12.3
slack-sdk==3.21.3
huggingface_hub==0.23.0
gitpython==3.1.38
yt_dlp==2023.11.14
PyGithub==1.59.1
feedparser==6.0.10
newspaper3k==0.2.8
listparser==0.19

View File

@@ -0,0 +1,33 @@
app:
config:
id: 'default-app'
llm:
provider: openai
config:
model: 'gpt-3.5-turbo'
temperature: 0.5
max_tokens: 1000
top_p: 1
stream: false
template: |
Use the following pieces of context to answer the query at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
$context
Query: $query
Helpful Answer:
vectordb:
provider: chroma
config:
collection_name: 'rest-api-app'
dir: db
allow_reset: true
embedder:
provider: openai
config:
model: 'text-embedding-ada-002'

View File

@@ -0,0 +1,25 @@
from models import AppModel
from sqlalchemy.orm import Session
def get_app(db: Session, app_id: str):
return db.query(AppModel).filter(AppModel.app_id == app_id).first()
def get_apps(db: Session, skip: int = 0, limit: int = 100):
return db.query(AppModel).offset(skip).limit(limit).all()
def save_app(db: Session, app_id: str, config: str):
db_app = AppModel(app_id=app_id, config=config)
db.add(db_app)
db.commit()
db.refresh(db_app)
return db_app
def remove_app(db: Session, app_id: str):
db_app = db.query(AppModel).filter(AppModel.app_id == app_id).first()
db.delete(db_app)
db.commit()
return db_app

View File

@@ -0,0 +1,22 @@
def generate_error_message_for_api_keys(error: ValueError) -> str:
env_mapping = {
"OPENAI_API_KEY": "OPENAI_API_KEY",
"OPENAI_API_TYPE": "OPENAI_API_TYPE",
"OPENAI_API_BASE": "OPENAI_API_BASE",
"OPENAI_API_VERSION": "OPENAI_API_VERSION",
"COHERE_API_KEY": "COHERE_API_KEY",
"TOGETHER_API_KEY": "TOGETHER_API_KEY",
"ANTHROPIC_API_KEY": "ANTHROPIC_API_KEY",
"JINACHAT_API_KEY": "JINACHAT_API_KEY",
"HUGGINGFACE_ACCESS_TOKEN": "HUGGINGFACE_ACCESS_TOKEN",
"REPLICATE_API_TOKEN": "REPLICATE_API_TOKEN",
}
missing_keys = [env_mapping[key] for key in env_mapping if key in str(error)]
if missing_keys:
missing_keys_str = ", ".join(missing_keys)
return f"""Please set the {missing_keys_str} environment variable(s) when running the Docker container.
Example: `docker run -e {missing_keys[0]}=xxx embedchain/rest-api:latest`
"""
else:
return "Error: " + str(error)