Rename embedchain to mem0 and open sourcing code for long term memory (#1474)

Co-authored-by: Deshraj Yadav <deshrajdry@gmail.com>
This commit is contained in:
Taranjeet Singh
2024-07-12 07:51:33 -07:00
committed by GitHub
parent 83e8c97295
commit f842a92e25
665 changed files with 9427 additions and 6592 deletions

View File

@@ -1 +0,0 @@
OPENAI_API_KEY="your-openai-api-key"

3
.gitignore vendored
View File

@@ -179,3 +179,6 @@ notebooks/*.yaml
# cache db
*.db
# local directories for testing
eval/

View File

@@ -1,20 +0,0 @@
repos:
- repo: https://github.com/psf/black
rev: 23.3.0
hooks:
- id: black
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: 'v0.0.252'
hooks:
- id: ruff
name: ruff
# Respect `exclude` and `extend-exclude` settings.
args: ["--force-exclude"]
- repo: local
hooks:
- id: pytest-check
name: pytest-check
entry: poetry run pytest
language: system
pass_filenames: false
always_run: true

View File

@@ -1,46 +1,26 @@
.PHONY: format sort lint
# Variables
PYTHON := python3
PIP := $(PYTHON) -m pip
PROJECT_NAME := embedchain
RUFF_OPTIONS = --line-length 120
ISORT_OPTIONS = --profile black
# Targets
.PHONY: install format lint clean test ci_lint ci_test coverage
install:
poetry install
# TODO: use a more efficient way to install these packages
install_all:
poetry install --all-extras
poetry run pip install pinecone-text pinecone-client langchain-anthropic "unstructured[local-inference, all-docs]" ollama langchain_together==0.1.3 \
langchain_cohere==0.1.5 deepgram-sdk==3.2.7 langchain-huggingface psutil clarifai==10.0.1 flask==2.3.3 twilio==8.5.0 fastapi-poe==0.0.16 discord==2.3.2 \
slack-sdk==3.21.3 huggingface_hub==0.23.0 gitpython==3.1.38 yt_dlp==2023.11.14 PyGithub==1.59.1 feedparser==6.0.10 newspaper3k==0.2.8 listparser==0.19 \
modal==0.56.4329 dropbox==11.36.2 boto3==1.34.20 youtube-transcript-api==0.6.1 pytube==15.0.0 beautifulsoup4==4.12.3
install_es:
poetry install --extras elasticsearch
install_opensearch:
poetry install --extras opensearch
install_milvus:
poetry install --extras milvus
shell:
poetry shell
py_shell:
poetry run python
# Default target
all: format sort lint
# Format code with ruff
format:
$(PYTHON) -m black .
$(PYTHON) -m isort .
poetry run ruff check . --fix $(RUFF_OPTIONS)
clean:
rm -rf dist build *.egg-info
# Sort imports with isort
sort:
poetry run isort . $(ISORT_OPTIONS)
# Lint code with ruff
lint:
poetry run ruff .
poetry run ruff check . $(RUFF_OPTIONS)
docs:
cd docs && mintlify dev
build:
poetry build
@@ -48,9 +28,5 @@ build:
publish:
poetry publish
# for example: make test file=tests/test_factory.py
test:
poetry run pytest $(file)
coverage:
poetry run pytest --cov=$(PROJECT_NAME) --cov-report=xml
clean:
poetry run rm -rf dist

278
README.md
View File

@@ -1,125 +1,197 @@
<p align="center">
<img src="docs/logo/dark.svg" width="400px" alt="Embedchain Logo">
</p>
# Mem0: Long-Term Memory for LLMs
<p align="center">
<a href="https://pypi.org/project/embedchain/">
<img src="https://img.shields.io/pypi/v/embedchain" alt="PyPI">
</a>
<a href="https://pepy.tech/project/embedchain">
<img src="https://static.pepy.tech/badge/embedchain" alt="Downloads">
</a>
<a href="https://embedchain.ai/slack">
<img src="https://img.shields.io/badge/slack-embedchain-brightgreen.svg?logo=slack" alt="Slack">
</a>
<a href="https://embedchain.ai/discord">
<img src="https://dcbadge.vercel.app/api/server/6PzXDgEjG5?style=flat" alt="Discord">
</a>
<a href="https://twitter.com/embedchain">
<img src="https://img.shields.io/twitter/follow/embedchain" alt="Twitter">
</a>
<a href="https://colab.research.google.com/drive/138lMWhENGeEu7Q1-6lNbNTHGLZXBBz_B?usp=sharing">
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open in Colab">
</a>
<a href="https://codecov.io/gh/embedchain/embedchain">
<img src="https://codecov.io/gh/embedchain/embedchain/graph/badge.svg?token=EMRRHZXW1Q" alt="codecov">
</a>
</p>
Mem0 provides a smart, self-improving memory layer for Large Language Models, enabling personalized AI experiences across applications.
<hr />
## Features
## What is Embedchain?
- Persistent memory for users, sessions, and agents
- Self-improving personalization
- Simple API for easy integration
- Cross-platform consistency
Embedchain is an Open Source Framework for personalizing LLM responses. It makes it easy to create and deploy personalized AI apps. At its core, Embedchain follows the design principle of being *"Conventional but Configurable"* to serve both software engineers and machine learning engineers.
## Quick Start
Embedchain streamlines the creation of personalized LLM applications, offering a seamless process for managing various types of unstructured data. It efficiently segments data into manageable chunks, generates relevant embeddings, and stores them in a vector database for optimized retrieval. With a suite of diverse APIs, it enables users to extract contextual information, find precise answers, or engage in interactive chat conversations, all tailored to their own data.
### Installation
## 🔧 Quick install
### Python API
```bash
pip install embedchain
pip install mem0ai
```
## ✨ Live demo
## Usage
Checkout the [Chat with PDF](https://embedchain.ai/demo/chat-pdf) live demo we created using Embedchain. You can find the source code [here](https://github.com/embedchain/embedchain/tree/main/examples/chat-pdf).
## 🔍 Usage
<!-- Demo GIF or Image -->
<p align="center">
<img src="docs/images/cover.gif" width="900px" alt="Embedchain Demo">
</p>
For example, you can create an Elon Musk bot using the following code:
### Instantiate
```python
import os
from embedchain import App
from mem0 import Memory
# Create a bot instance
os.environ["OPENAI_API_KEY"] = "<YOUR_API_KEY>"
app = App()
# Embed online resources
app.add("https://en.wikipedia.org/wiki/Elon_Musk")
app.add("https://www.forbes.com/profile/elon-musk")
# Query the app
app.query("How many companies does Elon Musk run and name those?")
# Answer: Elon Musk currently runs several companies. As of my knowledge, he is the CEO and lead designer of SpaceX, the CEO and product architect of Tesla, Inc., the CEO and founder of Neuralink, and the CEO and founder of The Boring Company. However, please note that this information may change over time, so it's always good to verify the latest updates.
m = Memory()
```
You can also try it in your browser with Google Colab:
If you want to use Qdrant in server mode, use the following method to instantiate.
[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/17ON1LPonnXAtLaZEebnOktstB_1cJJmh?usp=sharing)
Run qdrant first:
## 📖 Documentation
Comprehensive guides and API documentation are available to help you get the most out of Embedchain:
- [Introduction](https://docs.embedchain.ai/get-started/introduction#what-is-embedchain)
- [Getting Started](https://docs.embedchain.ai/get-started/quickstart)
- [Examples](https://docs.embedchain.ai/examples)
- [Supported data types](https://docs.embedchain.ai/components/data-sources/overview)
## 🔗 Join the Community
* Connect with fellow developers by joining our [Slack Community](https://embedchain.ai/slack) or [Discord Community](https://embedchain.ai/discord).
* Dive into [GitHub Discussions](https://github.com/embedchain/embedchain/discussions), ask questions, or share your experiences.
## 🤝 Schedule a 1-on-1 Session
Book a [1-on-1 Session](https://cal.com/taranjeetio/ec) with the founders, to discuss any issues, provide feedback, or explore how we can improve Embedchain for you.
## 🌐 Contributing
Contributions are welcome! Please check out the issues on the repository, and feel free to open a pull request.
For more information, please see the [contributing guidelines](CONTRIBUTING.md).
For more reference, please go through [Development Guide](https://docs.embedchain.ai/contribution/dev) and [Documentation Guide](https://docs.embedchain.ai/contribution/docs).
<a href="https://github.com/embedchain/embedchain/graphs/contributors">
<img src="https://contrib.rocks/image?repo=embedchain/embedchain" />
</a>
## Anonymous Telemetry
We collect anonymous usage metrics to enhance our package's quality and user experience. This includes data like feature usage frequency and system info, but never personal details. The data helps us prioritize improvements and ensure compatibility. If you wish to opt-out, set the environment variable `EC_TELEMETRY=false`. We prioritize data security and don't share this data externally.
## Citation
If you utilize this repository, please consider citing it with:
```bash
docker pull qdrant/qdrant
docker run -p 6333:6333 -p 6334:6334 \
-v $(pwd)/qdrant_storage:/qdrant/storage:z \
qdrant/qdrant
```
@misc{embedchain,
author = {Taranjeet Singh, Deshraj Yadav},
title = {Embedchain: The Open Source RAG Framework},
year = {2023},
publisher = {GitHub},
journal = {GitHub repository},
howpublished = {\url{https://github.com/embedchain/embedchain}},
Then, instantiate memory with qdrant server:
```python
from mem0 import Memory
config = {
"vector_store": {
"provider": "qdrant",
"config": {
"host": "localhost",
"port": 6333,
}
},
}
m = Memory.from_config(config)
```
### Store a Memory
```python
m.add("Likes to play cricket over weekend", user_id="alex", metadata={"foo": "bar"})
# Output:
# [
# {
# 'id': 'm1',
# 'event': 'add',
# 'data': 'Likes to play cricket over weekend'
# }
# ]
# Similarly, you can store a memory for an agent
m.add("Agent X is best travel agent in Paris", agent_id="agent-x", metadata={"type": "long-term"})
```
### Retrieve all memories
#### 1. Get all memories
```python
m.get_all()
# Output:
# [
# {
# 'id': 'm1',
# 'text': 'Likes to play cricket over weekend',
# 'metadata': {
# 'data': 'Likes to play cricket over weekend'
# }
# },
# {
# 'id': 'm2',
# 'text': 'Agent X is best travel agent in Paris',
# 'metadata': {
# 'data': 'Agent X is best travel agent in Paris'
# }
# }
# ]
```
#### 2. Get memories for specific user
```python
m.get_all(user_id="alex")
```
#### 3. Get memories for specific agent
```python
m.get_all(agent_id="agent-x")
```
#### 4. Get memories for a user during an agent run
```python
m.get_all(agent_id="agent-x", user_id="alex")
```
### Retrieve a Memory
```python
memory_id = "m1"
m.get(memory_id)
# Output:
# {
# 'id': '1',
# 'text': 'Likes to play cricket over weekend',
# 'metadata': {
# 'data': 'Likes to play cricket over weekend'
# }
# }
```
### Search for related memories
```python
m.search(query="What is my name", user_id="deshraj")
```
### Update a Memory
```python
m.update(memory_id="m1", data="Likes to play tennis")
```
### Get history of a Memory
```python
m.history(memory_id="m1")
# Output:
# [
# {
# 'id': 'h1',
# 'memory_id': 'm1',
# 'prev_value': None,
# 'new_value': 'Likes to play cricket over weekend',
# 'event': 'add',
# 'timestamp': '2024-06-12 21:00:54.466687',
# 'is_deleted': 0
# },
# {
# 'id': 'h2',
# 'memory_id': 'm1',
# 'prev_value': 'Likes to play cricket over weekend',
# 'new_value': 'Likes to play tennis',
# 'event': 'update',
# 'timestamp': '2024-06-12 21:01:17.230943',
# 'is_deleted': 0
# }
# ]
```
### Delete a Memory
#### Delete specific memory
```python
m.delete(memory_id="m1")
```
#### Delete memories for a user or agent
```python
m.delete_all(user_id="alex")
m.delete_all(agent_id="agent-x")
```
#### Delete all Memories
```python
m.reset()
```
## License
[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0)

View File

@@ -1,7 +1,14 @@
# Contributing to embedchain docs
# Mintlify Starter Kit
Click on `Use this template` to copy the Mintlify starter kit. The starter kit contains examples including
### 👩‍💻 Development
- Guide pages
- Navigation
- Customizations
- API Reference pages
- Use of popular components
### Development
Install the [Mintlify CLI](https://www.npmjs.com/package/mintlify) to preview the documentation changes locally. To install, use the following command
@@ -15,9 +22,9 @@ Run the following command at the root of your documentation (where mint.json is)
mintlify dev
```
### 😎 Publishing Changes
### Publishing Changes
Changes will be deployed to production automatically after your PR is merged to the main branch.
Install our Github App to auto propagate changes from your repo to your deployment. Changes will be deployed to production automatically after pushing to the default branch. Find the link to install on your dashboard.
#### Troubleshooting

49
docs/favicon.svg Normal file
View File

@@ -0,0 +1,49 @@
<svg width="24" height="24" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
<path d="M7.95343 21.1394C4.89586 21.1304 2.25471 19.458 0.987296 16.2895C-0.280118 13.121 0.108924 9.16314 1.74363 5.61505C4.8012 5.62409 7.44235 7.29648 8.70976 10.465C9.97718 13.6335 9.58814 17.5914 7.95343 21.1394Z" fill="white"/>
<path d="M7.95343 21.1394C4.89586 21.1304 2.25471 19.458 0.987296 16.2895C-0.280118 13.121 0.108924 9.16314 1.74363 5.61505C4.8012 5.62409 7.44235 7.29648 8.70976 10.465C9.97718 13.6335 9.58814 17.5914 7.95343 21.1394Z" fill="url(#paint0_radial_101_2703)"/>
<path d="M7.95343 21.1394C4.89586 21.1304 2.25471 19.458 0.987296 16.2895C-0.280118 13.121 0.108924 9.16314 1.74363 5.61505C4.8012 5.62409 7.44235 7.29648 8.70976 10.465C9.97718 13.6335 9.58814 17.5914 7.95343 21.1394Z" fill="black" fill-opacity="0.5" style="mix-blend-mode:hard-light"/>
<path d="M7.95343 21.1394C4.89586 21.1304 2.25471 19.458 0.987296 16.2895C-0.280118 13.121 0.108924 9.16314 1.74363 5.61505C4.8012 5.62409 7.44235 7.29648 8.70976 10.465C9.97718 13.6335 9.58814 17.5914 7.95343 21.1394Z" fill="url(#paint1_linear_101_2703)" fill-opacity="0.5" style="mix-blend-mode:hard-light"/>
<path d="M8.68359 10.4755C9.94543 13.63 9.56145 17.5723 7.9354 21.1112C4.89702 21.0957 2.27411 19.4306 1.01347 16.279C-0.248375 13.1245 0.135612 9.18218 1.76165 5.64328C4.80004 5.65883 7.42295 7.32386 8.68359 10.4755Z" stroke="url(#paint2_linear_101_2703)" stroke-opacity="0.05" stroke-width="0.056338"/>
<path d="M7.31038 21.2574C11.3543 20.2215 14.8836 17.3754 16.6285 13.2361C18.3735 9.09671 17.9448 4.58749 15.8598 0.976291C11.8159 2.01214 8.2866 4.85826 6.54167 8.99762C4.79674 13.137 5.2254 17.6462 7.31038 21.2574Z" fill="white"/>
<path d="M7.31038 21.2574C11.3543 20.2215 14.8836 17.3754 16.6285 13.2361C18.3735 9.09671 17.9448 4.58749 15.8598 0.976291C11.8159 2.01214 8.2866 4.85826 6.54167 8.99762C4.79674 13.137 5.2254 17.6462 7.31038 21.2574Z" fill="url(#paint3_radial_101_2703)"/>
<path d="M16.6026 13.2251C14.8642 17.349 11.3512 20.1866 7.32411 21.2248C5.25257 17.624 4.82926 13.1324 6.56764 9.00855C8.30603 4.88472 11.819 2.04706 15.8461 1.00889C17.9176 4.60967 18.3409 9.10131 16.6026 13.2251Z" stroke="url(#paint4_linear_101_2703)" stroke-opacity="0.05" stroke-width="0.056338"/>
<path d="M7.23368 21.2069C9.78906 23.2373 13.2102 23.9506 16.5772 22.8141C19.9441 21.6775 22.5058 18.9445 23.7304 15.6382C21.175 13.6078 17.7538 12.8944 14.3869 14.031C11.0199 15.1676 8.45822 17.9006 7.23368 21.2069Z" fill="white"/>
<path d="M7.23368 21.2069C9.78906 23.2373 13.2102 23.9506 16.5772 22.8141C19.9441 21.6775 22.5058 18.9445 23.7304 15.6382C21.175 13.6078 17.7538 12.8944 14.3869 14.031C11.0199 15.1676 8.45822 17.9006 7.23368 21.2069Z" fill="url(#paint5_radial_101_2703)"/>
<path d="M7.23368 21.2069C9.78906 23.2373 13.2102 23.9506 16.5772 22.8141C19.9441 21.6775 22.5058 18.9445 23.7304 15.6382C21.175 13.6078 17.7538 12.8944 14.3869 14.031C11.0199 15.1676 8.45822 17.9006 7.23368 21.2069Z" fill="black" fill-opacity="0.2" style="mix-blend-mode:hard-light"/>
<path d="M7.23368 21.2069C9.78906 23.2373 13.2102 23.9506 16.5772 22.8141C19.9441 21.6775 22.5058 18.9445 23.7304 15.6382C21.175 13.6078 17.7538 12.8944 14.3869 14.031C11.0199 15.1676 8.45822 17.9006 7.23368 21.2069Z" fill="url(#paint6_linear_101_2703)" fill-opacity="0.5" style="mix-blend-mode:hard-light"/>
<path d="M16.5682 22.7874C13.2176 23.9184 9.81361 23.2124 7.2672 21.1975C8.49194 17.9068 11.0444 15.189 14.3959 14.0577C17.7465 12.9266 21.1504 13.6326 23.6968 15.6476C22.4721 18.9383 19.9196 21.656 16.5682 22.7874Z" stroke="url(#paint7_linear_101_2703)" stroke-opacity="0.05" stroke-width="0.056338"/>
<defs>
<radialGradient id="paint0_radial_101_2703" cx="0" cy="0" r="1" gradientUnits="userSpaceOnUse" gradientTransform="translate(-3.00503 15.023) rotate(-10.029) scale(17.9572 17.784)">
<stop stop-color="#00B0BB"/>
<stop offset="1" stop-color="#00DB65"/>
</radialGradient>
<linearGradient id="paint1_linear_101_2703" x1="7.39036" y1="4.81308" x2="1.62975" y2="18.6894" gradientUnits="userSpaceOnUse">
<stop stop-color="#18E299"/>
<stop offset="1"/>
</linearGradient>
<linearGradient id="paint2_linear_101_2703" x1="7.94816" y1="8.01563" x2="1.7612" y2="18.746" gradientUnits="userSpaceOnUse">
<stop/>
<stop offset="1" stop-opacity="0"/>
</linearGradient>
<radialGradient id="paint3_radial_101_2703" cx="0" cy="0" r="1" gradientUnits="userSpaceOnUse" gradientTransform="translate(8.11404 20.8822) rotate(-75.7542) scale(21.6246 23.7772)">
<stop stop-color="#00BBBB"/>
<stop offset="0.712616" stop-color="#00DB65"/>
</radialGradient>
<linearGradient id="paint4_linear_101_2703" x1="7.60205" y1="5.8709" x2="15.5561" y2="16.3719" gradientUnits="userSpaceOnUse">
<stop/>
<stop offset="1" stop-opacity="0"/>
</linearGradient>
<radialGradient id="paint5_radial_101_2703" cx="0" cy="0" r="1" gradientUnits="userSpaceOnUse" gradientTransform="translate(7.84537 21.5181) rotate(-20.3525) scale(18.5603 17.32)">
<stop stop-color="#00B0BB"/>
<stop offset="1" stop-color="#00DB65"/>
</radialGradient>
<linearGradient id="paint6_linear_101_2703" x1="16.8078" y1="13.0071" x2="10.0409" y2="22.9937" gradientUnits="userSpaceOnUse">
<stop stop-color="#00B1BC"/>
<stop offset="1"/>
</linearGradient>
<linearGradient id="paint7_linear_101_2703" x1="16.8078" y1="13.0071" x2="14.1687" y2="23.841" gradientUnits="userSpaceOnUse">
<stop/>
<stop offset="1" stop-opacity="0"/>
</linearGradient>
</defs>
</svg>

After

Width:  |  Height:  |  Size: 5.3 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 13 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 13 KiB

191
docs/introduction.mdx Normal file
View File

@@ -0,0 +1,191 @@
---
title: Introduction
description: 'Welcome to the Mem0 documentation'
---
Mem0 is the long-term memory for AI Agents.
## Installation
```bash
pip install mem0ai
```
## Usage
### Instantiate
```python
from mem0 import Memory
m = Memory()
```
Mem0 uses Qdrant by default for storing the semantic memories. If you want to use Qdrant in server mode, use the following method to instantiate.
Run qdrant first:
```bash
docker pull qdrant/qdrant
docker run -p 6333:6333 -p 6334:6334 \
-v $(pwd)/qdrant_storage:/qdrant/storage:z \
qdrant/qdrant
```
Then, instantiate memory with qdrant server:
```python
from mem0 import Memory
config = {
"vector_store": {
"provider": "qdrant",
"config": {
"host": "localhost",
"port": 6333,
}
},
}
m = Memory.from_config(config)
```
### Store a Memory
```python
m.add("Likes to play cricket over weekend", user_id="deshraj", metadata={"foo": "bar"})
# Output:
# [
# {
# 'id': 'm1',
# 'event': 'add',
# 'data': 'Likes to play cricket over weekend'
# }
# ]
# Similarly, you can store a memory for an agent
m.add("Agent X is best travel agent in Paris", agent_id="agent-x", metadata={"type": "long-term"})
```
### Retrieve all memories
#### 1. Get all memories
```python
m.get_all()
# Output:
# [
# {
# 'id': 'm1',
# 'text': 'Likes to play cricket over weekend',
# 'metadata': {
# 'data': 'Likes to play cricket over weekend'
# }
# },
# {
# 'id': 'm2',
# 'text': 'Agent X is best travel agent in Paris',
# 'metadata': {
# 'data': 'Agent X is best travel agent in Paris'
# }
# }
# ]
```
#### 2. Get memories for specific user
```python
m.get_all(user_id="deshraj")
```
#### 3. Get memories for specific agent
```python
m.get_all(agent_id="agent-x")
```
#### 4. Get memories for a user during an agent run
```python
m.get_all(agent_id="agent-x", user_id="deshraj")
```
### Retrieve a Memory
```python
memory_id = "m1"
m.get(memory_id)
# Output:
# {
# 'id': '1',
# 'text': 'Likes to play cricket over weekend',
# 'metadata': {
# 'data': 'Likes to play cricket over weekend'
# }
# }
```
### Search for related memories
```python
m.search(query="What is my name", user_id="deshraj")
```
### Update a Memory
```python
m.update(memory_id="m1", data="Likes to play tennis")
```
### Get history of a Memory
```python
m.history(memory_id="m1")
# Output:
# [
# {
# 'id': 'h1',
# 'memory_id': 'm1',
# 'prev_value': None,
# 'new_value': 'Likes to play cricket over weekend',
# 'event': 'add',
# 'timestamp': '2024-06-12 21:00:54.466687',
# 'is_deleted': 0
# },
# {
# 'id': 'h2',
# 'memory_id': 'm1',
# 'prev_value': 'Likes to play cricket over weekend',
# 'new_value': 'Likes to play tennis',
# 'event': 'update',
# 'timestamp': '2024-06-12 21:01:17.230943',
# 'is_deleted': 0
# }
# ]
```
### Delete a Memory
```python
m.delete(memory_id="m1")
```
### Delete memories of a user or agent
```python
m.delete_all(user_id="deshraj")
m.delete_all(agent_id="agent-x")
```
### Delete all Memories
```python
m.reset()
```
## Contributing
Pull requests are welcome. For major changes, please open an issue first to discuss what you would like to change. Please make sure to update tests as appropriate.
## License
[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0)

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 2.8 KiB

After

Width:  |  Height:  |  Size: 13 KiB

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 2.8 KiB

After

Width:  |  Height:  |  Size: 13 KiB

View File

@@ -1,12 +1,7 @@
{
"$schema": "https://mintlify.com/schema.json",
"name": "Embedchain",
"logo": {
"dark": "/logo/dark-rt.svg",
"light": "/logo/light-rt.svg",
"href": "https://github.com/embedchain/embedchain"
},
"favicon": "/favicon.png",
"name": "Mem0.ai",
"favicon": "/logo/light.svg",
"colors": {
"primary": "#3B2FC9",
"light": "#6673FF",
@@ -16,262 +11,40 @@
"light": "#fff"
}
},
"modeToggle": {
"default": "dark"
"logo": {
"dark": "/logo/dark.svg",
"light": "/logo/light.svg",
"href": "https://github.com/embedchain/embedchain"
},
"openapi": ["/rest-api.json"],
"metadata": {
"og:image": "/images/og.png",
"twitter:site": "@embedchain"
},
"tabs": [
"topbarLinks": [
{
"name": "Examples",
"url": "examples"
},
{
"name": "API Reference",
"url": "api-reference"
"name": "Support",
"url": "mailto:founders@mem0.ai"
}
],
"anchors": [
{
"name": "Talk to founders",
"icon": "calendar",
"url": "https://cal.com/taranjeetio/ec"
}
],
"topbarLinks": [
"name": "Slack",
"icon": "slack",
"url": "https://mem0.ai/slack/"
},
{
"name": "GitHub",
"url": "https://github.com/embedchain/embedchain"
"name": "Discord",
"icon": "discord",
"url": "https://mem0.ai/discord/"
}
],
"topbarCtaButton": {
"name": "Join our slack",
"url": "https://embedchain.ai/slack"
},
"primaryTab": {
"name": "Documentation"
},
"navigation": [
{
"group": "Get Started",
"pages": [
"get-started/quickstart",
"get-started/introduction",
"get-started/faq",
"get-started/full-stack",
{
"group": "🔗 Integrations",
"pages": [
"integration/langsmith",
"integration/chainlit",
"integration/streamlit-mistral",
"integration/openlit",
"integration/helicone"
]
}
"introduction"
]
},
{
"group": "Use cases",
"pages": [
"use-cases/introduction",
"use-cases/chatbots",
"use-cases/question-answering",
"use-cases/semantic-search"
]
},
{
"group": "Components",
"pages": [
"components/introduction",
{
"group": "🗂️ Data sources",
"pages": [
"components/data-sources/overview",
{
"group": "Data types",
"pages": [
"components/data-sources/pdf-file",
"components/data-sources/csv",
"components/data-sources/json",
"components/data-sources/text",
"components/data-sources/directory",
"components/data-sources/web-page",
"components/data-sources/youtube-channel",
"components/data-sources/youtube-video",
"components/data-sources/docs-site",
"components/data-sources/mdx",
"components/data-sources/docx",
"components/data-sources/notion",
"components/data-sources/sitemap",
"components/data-sources/xml",
"components/data-sources/qna",
"components/data-sources/openapi",
"components/data-sources/gmail",
"components/data-sources/github",
"components/data-sources/postgres",
"components/data-sources/mysql",
"components/data-sources/slack",
"components/data-sources/discord",
"components/data-sources/discourse",
"components/data-sources/substack",
"components/data-sources/beehiiv",
"components/data-sources/directory",
"components/data-sources/dropbox",
"components/data-sources/image",
"components/data-sources/custom"
]
},
"components/data-sources/data-type-handling"
]
},
{
"group": "🗄️ Vector databases",
"pages": [
"components/vector-databases/chromadb",
"components/vector-databases/elasticsearch",
"components/vector-databases/pinecone",
"components/vector-databases/opensearch",
"components/vector-databases/qdrant",
"components/vector-databases/weaviate",
"components/vector-databases/zilliz"
]
},
"components/llms",
"components/embedding-models",
"components/evaluation"
]
},
{
"group": "Deployment",
"pages": [
"get-started/deployment",
"deployment/fly_io",
"deployment/modal_com",
"deployment/render_com",
"deployment/railway",
"deployment/streamlit_io",
"deployment/gradio_app",
"deployment/huggingface_spaces"
]
},
{
"group": "Community",
"pages": ["community/connect-with-us"]
},
{
"group": "Examples",
"pages": [
"examples/chat-with-PDF",
"examples/notebooks-and-replits",
{
"group": "REST API Service",
"pages": [
"examples/rest-api/getting-started",
"examples/rest-api/create",
"examples/rest-api/get-all-apps",
"examples/rest-api/add-data",
"examples/rest-api/get-data",
"examples/rest-api/query",
"examples/rest-api/deploy",
"examples/rest-api/delete",
"examples/rest-api/check-status"
]
},
"examples/full_stack",
"examples/openai-assistant",
"examples/opensource-assistant",
"examples/nextjs-assistant",
"examples/slack-AI"
]
},
{
"group": "Chatbots",
"pages": [
"examples/discord_bot",
"examples/slack_bot",
"examples/telegram_bot",
"examples/whatsapp_bot",
"examples/poe_bot"
]
},
{
"group": "Showcase",
"pages": ["examples/showcase"]
},
{
"group": "API Reference",
"pages": [
"api-reference/app/overview",
{
"group": "App methods",
"pages": [
"api-reference/app/add",
"api-reference/app/query",
"api-reference/app/chat",
"api-reference/app/search",
"api-reference/app/get",
"api-reference/app/evaluate",
"api-reference/app/deploy",
"api-reference/app/reset",
"api-reference/app/delete"
]
},
"api-reference/store/openai-assistant",
"api-reference/store/ai-assistants",
"api-reference/advanced/configuration"
]
},
{
"group": "Contributing",
"pages": [
"contribution/guidelines",
"contribution/dev",
"contribution/docs",
"contribution/python"
]
},
{
"group": "Product",
"pages": ["product/release-notes"]
}
],
"footerSocials": {
"website": "https://embedchain.ai",
"github": "https://github.com/embedchain/embedchain",
"slack": "https://embedchain.ai/slack",
"discord": "https://discord.gg/6PzXDgEjG5",
"twitter": "https://twitter.com/embedchain",
"linkedin": "https://www.linkedin.com/company/embedchain"
},
"isWhiteLabeled": true,
"analytics": {
"posthog": {
"apiKey": "phc_PHQDA5KwztijnSojsxJ2c1DuJd52QCzJzT2xnSGvjN2",
"apiHost": "https://app.embedchain.ai/ingest"
},
"ga4": {
"measurementId": "G-4QK7FJE6T3"
}
},
"feedback": {
"suggestEdit": true,
"raiseIssue": true,
"thumbsRating": true
},
"search": {
"prompt": "✨ Search embedchain docs..."
},
"api": {
"baseUrl": "http://localhost:8080"
},
"redirects": [
{
"source": "/changelog/command-line",
"destination": "/get-started/introduction"
}
]
"x": "https://x.com/mem0ai",
"github": "https://github.com/embedchain/embedchain/mem0",
"linkedin": "https://www.linkedin.com/company/mem0/"
}
}

View File

@@ -0,0 +1,4 @@
One of the core principles of software development is DRY (Don't Repeat
Yourself). This is a principle that apply to documentation as
well. If you find yourself repeating the same content in multiple places, you
should consider creating a custom snippet to keep your content in sync.

56
embedchain/Makefile Normal file
View File

@@ -0,0 +1,56 @@
# Variables
PYTHON := python3
PIP := $(PYTHON) -m pip
PROJECT_NAME := embedchain
# Targets
.PHONY: install format lint clean test ci_lint ci_test coverage
install:
poetry install
# TODO: use a more efficient way to install these packages
install_all:
poetry install --all-extras
poetry run pip install pinecone-text pinecone-client langchain-anthropic "unstructured[local-inference, all-docs]" ollama langchain_together==0.1.3 \
langchain_cohere==0.1.5 deepgram-sdk==3.2.7 langchain-huggingface psutil clarifai==10.0.1 flask==2.3.3 twilio==8.5.0 fastapi-poe==0.0.16 discord==2.3.2 \
slack-sdk==3.21.3 huggingface_hub==0.23.0 gitpython==3.1.38 yt_dlp==2023.11.14 PyGithub==1.59.1 feedparser==6.0.10 newspaper3k==0.2.8 listparser==0.19 \
modal==0.56.4329 dropbox==11.36.2 boto3==1.34.20 youtube-transcript-api==0.6.1 pytube==15.0.0 beautifulsoup4==4.12.3
install_es:
poetry install --extras elasticsearch
install_opensearch:
poetry install --extras opensearch
install_milvus:
poetry install --extras milvus
shell:
poetry shell
py_shell:
poetry run python
format:
$(PYTHON) -m black .
$(PYTHON) -m isort .
clean:
rm -rf dist build *.egg-info
lint:
poetry run ruff .
build:
poetry build
publish:
poetry publish
# for example: make test file=tests/test_factory.py
test:
poetry run pytest $(file)
coverage:
poetry run pytest --cov=$(PROJECT_NAME) --cov-report=xml

125
embedchain/README.md Normal file
View File

@@ -0,0 +1,125 @@
<p align="center">
<img src="docs/logo/dark.svg" width="400px" alt="Embedchain Logo">
</p>
<p align="center">
<a href="https://pypi.org/project/embedchain/">
<img src="https://img.shields.io/pypi/v/embedchain" alt="PyPI">
</a>
<a href="https://pepy.tech/project/embedchain">
<img src="https://static.pepy.tech/badge/embedchain" alt="Downloads">
</a>
<a href="https://embedchain.ai/slack">
<img src="https://img.shields.io/badge/slack-embedchain-brightgreen.svg?logo=slack" alt="Slack">
</a>
<a href="https://embedchain.ai/discord">
<img src="https://dcbadge.vercel.app/api/server/6PzXDgEjG5?style=flat" alt="Discord">
</a>
<a href="https://twitter.com/embedchain">
<img src="https://img.shields.io/twitter/follow/embedchain" alt="Twitter">
</a>
<a href="https://colab.research.google.com/drive/138lMWhENGeEu7Q1-6lNbNTHGLZXBBz_B?usp=sharing">
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open in Colab">
</a>
<a href="https://codecov.io/gh/embedchain/embedchain">
<img src="https://codecov.io/gh/embedchain/embedchain/graph/badge.svg?token=EMRRHZXW1Q" alt="codecov">
</a>
</p>
<hr />
## What is Embedchain?
Embedchain is an Open Source Framework for personalizing LLM responses. It makes it easy to create and deploy personalized AI apps. At its core, Embedchain follows the design principle of being *"Conventional but Configurable"* to serve both software engineers and machine learning engineers.
Embedchain streamlines the creation of personalized LLM applications, offering a seamless process for managing various types of unstructured data. It efficiently segments data into manageable chunks, generates relevant embeddings, and stores them in a vector database for optimized retrieval. With a suite of diverse APIs, it enables users to extract contextual information, find precise answers, or engage in interactive chat conversations, all tailored to their own data.
## 🔧 Quick install
### Python API
```bash
pip install embedchain
```
## ✨ Live demo
Checkout the [Chat with PDF](https://embedchain.ai/demo/chat-pdf) live demo we created using Embedchain. You can find the source code [here](https://github.com/embedchain/embedchain/tree/main/examples/chat-pdf).
## 🔍 Usage
<!-- Demo GIF or Image -->
<p align="center">
<img src="docs/images/cover.gif" width="900px" alt="Embedchain Demo">
</p>
For example, you can create an Elon Musk bot using the following code:
```python
import os
from embedchain import App
# Create a bot instance
os.environ["OPENAI_API_KEY"] = "<YOUR_API_KEY>"
app = App()
# Embed online resources
app.add("https://en.wikipedia.org/wiki/Elon_Musk")
app.add("https://www.forbes.com/profile/elon-musk")
# Query the app
app.query("How many companies does Elon Musk run and name those?")
# Answer: Elon Musk currently runs several companies. As of my knowledge, he is the CEO and lead designer of SpaceX, the CEO and product architect of Tesla, Inc., the CEO and founder of Neuralink, and the CEO and founder of The Boring Company. However, please note that this information may change over time, so it's always good to verify the latest updates.
```
You can also try it in your browser with Google Colab:
[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/17ON1LPonnXAtLaZEebnOktstB_1cJJmh?usp=sharing)
## 📖 Documentation
Comprehensive guides and API documentation are available to help you get the most out of Embedchain:
- [Introduction](https://docs.embedchain.ai/get-started/introduction#what-is-embedchain)
- [Getting Started](https://docs.embedchain.ai/get-started/quickstart)
- [Examples](https://docs.embedchain.ai/examples)
- [Supported data types](https://docs.embedchain.ai/components/data-sources/overview)
## 🔗 Join the Community
* Connect with fellow developers by joining our [Slack Community](https://embedchain.ai/slack) or [Discord Community](https://embedchain.ai/discord).
* Dive into [GitHub Discussions](https://github.com/embedchain/embedchain/discussions), ask questions, or share your experiences.
## 🤝 Schedule a 1-on-1 Session
Book a [1-on-1 Session](https://cal.com/taranjeetio/ec) with the founders, to discuss any issues, provide feedback, or explore how we can improve Embedchain for you.
## 🌐 Contributing
Contributions are welcome! Please check out the issues on the repository, and feel free to open a pull request.
For more information, please see the [contributing guidelines](CONTRIBUTING.md).
For more reference, please go through [Development Guide](https://docs.embedchain.ai/contribution/dev) and [Documentation Guide](https://docs.embedchain.ai/contribution/docs).
<a href="https://github.com/embedchain/embedchain/graphs/contributors">
<img src="https://contrib.rocks/image?repo=embedchain/embedchain" />
</a>
## Anonymous Telemetry
We collect anonymous usage metrics to enhance our package's quality and user experience. This includes data like feature usage frequency and system info, but never personal details. The data helps us prioritize improvements and ensure compatibility. If you wish to opt-out, set the environment variable `EC_TELEMETRY=false`. We prioritize data security and don't share this data externally.
## Citation
If you utilize this repository, please consider citing it with:
```
@misc{embedchain,
author = {Taranjeet Singh, Deshraj Yadav},
title = {Embedchain: The Open Source RAG Framework},
year = {2023},
publisher = {GitHub},
journal = {GitHub repository},
howpublished = {\url{https://github.com/embedchain/embedchain}},
}
```

View File

@@ -1,88 +0,0 @@
import os
from alembic import command
from alembic.config import Config
from sqlalchemy import create_engine
from sqlalchemy.engine.base import Engine
from sqlalchemy.orm import Session as SQLAlchemySession
from sqlalchemy.orm import scoped_session, sessionmaker
from .models import Base
class DatabaseManager:
def __init__(self, echo: bool = False):
self.database_uri = os.environ.get("EMBEDCHAIN_DB_URI")
self.echo = echo
self.engine: Engine = None
self._session_factory = None
def setup_engine(self) -> None:
"""Initializes the database engine and session factory."""
if not self.database_uri:
raise RuntimeError("Database URI is not set. Set the EMBEDCHAIN_DB_URI environment variable.")
connect_args = {}
if self.database_uri.startswith("sqlite"):
connect_args["check_same_thread"] = False
self.engine = create_engine(self.database_uri, echo=self.echo, connect_args=connect_args)
self._session_factory = scoped_session(sessionmaker(bind=self.engine))
Base.metadata.bind = self.engine
def init_db(self) -> None:
"""Creates all tables defined in the Base metadata."""
if not self.engine:
raise RuntimeError("Database engine is not initialized. Call setup_engine() first.")
Base.metadata.create_all(self.engine)
def get_session(self) -> SQLAlchemySession:
"""Provides a session for database operations."""
if not self._session_factory:
raise RuntimeError("Session factory is not initialized. Call setup_engine() first.")
return self._session_factory()
def close_session(self) -> None:
"""Closes the current session."""
if self._session_factory:
self._session_factory.remove()
def execute_transaction(self, transaction_block):
"""Executes a block of code within a database transaction."""
session = self.get_session()
try:
transaction_block(session)
session.commit()
except Exception as e:
session.rollback()
raise e
finally:
self.close_session()
# Singleton pattern to use throughout the application
database_manager = DatabaseManager()
# Convenience functions for backward compatibility and ease of use
def setup_engine(database_uri: str, echo: bool = False) -> None:
database_manager.database_uri = database_uri
database_manager.echo = echo
database_manager.setup_engine()
def alembic_upgrade() -> None:
"""Upgrades the database to the latest version."""
alembic_config_path = os.path.join(os.path.dirname(__file__), "..", "..", "alembic.ini")
alembic_cfg = Config(alembic_config_path)
command.upgrade(alembic_cfg, "head")
def init_db() -> None:
alembic_upgrade()
def get_session() -> SQLAlchemySession:
return database_manager.get_session()
def execute_transaction(transaction_block):
database_manager.execute_transaction(transaction_block)

View File

@@ -1,31 +0,0 @@
import uuid
from sqlalchemy import TIMESTAMP, Column, Integer, String, Text, func
from sqlalchemy.orm import declarative_base
Base = declarative_base()
metadata = Base.metadata
class DataSource(Base):
__tablename__ = "ec_data_sources"
id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
app_id = Column(Text, index=True)
hash = Column(Text, index=True)
type = Column(Text, index=True)
value = Column(Text)
meta_data = Column(Text, name="metadata")
is_uploaded = Column(Integer, default=0)
class ChatHistory(Base):
__tablename__ = "ec_chat_history"
app_id = Column(String, primary_key=True)
id = Column(String, primary_key=True)
session_id = Column(String, primary_key=True, index=True)
question = Column(Text)
answer = Column(Text)
meta_data = Column(Text, name="metadata")
created_at = Column(TIMESTAMP, default=func.current_timestamp(), index=True)

25
embedchain/docs/README.md Normal file
View File

@@ -0,0 +1,25 @@
# Contributing to embedchain docs
### 👩‍💻 Development
Install the [Mintlify CLI](https://www.npmjs.com/package/mintlify) to preview the documentation changes locally. To install, use the following command
```
npm i -g mintlify
```
Run the following command at the root of your documentation (where mint.json is)
```
mintlify dev
```
### 😎 Publishing Changes
Changes will be deployed to production automatically after your PR is merged to the main branch.
#### Troubleshooting
- Mintlify dev isn't running - Run `mintlify install` it'll re-install dependencies.
- Page loads as a 404 - Make sure you are running in a folder with `mint.json`

Some files were not shown because too many files have changed in this diff Show More