Discord loader (#976)
This commit is contained in:
28
docs/data-sources/discord.mdx
Normal file
28
docs/data-sources/discord.mdx
Normal file
@@ -0,0 +1,28 @@
|
||||
---
|
||||
title: "💬 Discord"
|
||||
---
|
||||
|
||||
To add any Discord channel messages to your app, just add the `channel_id` as the source and set the `data_type` to `discord`.
|
||||
|
||||
<Note>
|
||||
This loader requires a Discord bot token with read messages access.
|
||||
To obtain the token, follow the instructions provided in this tutorial:
|
||||
<a href="https://www.writebots.com/discord-bot-token/">How to Get a Discord Bot Token?</a>.
|
||||
</Note>
|
||||
|
||||
```python
|
||||
import os
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
# add your discord "BOT" token
|
||||
os.environ["DISCORD_TOKEN"] = "xxx"
|
||||
|
||||
app = App()
|
||||
|
||||
app.add("1177296711023075338", data_type="discord")
|
||||
|
||||
response = app.query("What is Joe saying about Elon Musk?")
|
||||
|
||||
print(response)
|
||||
# Answer: Joe is saying "Elon Musk is a genius".
|
||||
```
|
||||
@@ -24,6 +24,7 @@ Embedchain comes with built-in support for various data sources. We handle the c
|
||||
<Card title="🐬 MySQL" href="/data-sources/mysql"></Card>
|
||||
<Card title="🤖 Slack" href="/data-sources/slack"></Card>
|
||||
<Card title="🗨️ Discourse" href="/data-sources/discourse"></Card>
|
||||
<Card title="💬 Discord" href="/data-sources/discord"></Card>
|
||||
</CardGroup>
|
||||
|
||||
<br/ >
|
||||
|
||||
@@ -89,7 +89,8 @@
|
||||
"data-sources/openapi",
|
||||
"data-sources/youtube-video",
|
||||
"data-sources/discourse",
|
||||
"data-sources/substack"
|
||||
"data-sources/substack",
|
||||
"data-sources/discord"
|
||||
]
|
||||
},
|
||||
"data-sources/data-type-handling"
|
||||
|
||||
@@ -66,6 +66,7 @@ class DataFormatter(JSONSerializable):
|
||||
DataType.SUBSTACK: "embedchain.loaders.substack.SubstackLoader",
|
||||
DataType.GITHUB: "embedchain.loaders.github.GithubLoader",
|
||||
DataType.YOUTUBE_CHANNEL: "embedchain.loaders.youtube_channel.YoutubeChannelLoader",
|
||||
DataType.DISCORD: "embedchain.loaders.discord.DiscordLoader",
|
||||
}
|
||||
|
||||
custom_loaders = set(
|
||||
@@ -118,6 +119,7 @@ class DataFormatter(JSONSerializable):
|
||||
DataType.SUBSTACK: "embedchain.chunkers.substack.SubstackChunker",
|
||||
DataType.GITHUB: "embedchain.chunkers.common_chunker.CommonChunker",
|
||||
DataType.YOUTUBE_CHANNEL: "embedchain.chunkers.common_chunker.CommonChunker",
|
||||
DataType.DISCORD: "embedchain.chunkers.common_chunker.CommonChunker",
|
||||
}
|
||||
|
||||
if data_type in chunker_classes:
|
||||
|
||||
150
embedchain/loaders/discord.py
Normal file
150
embedchain/loaders/discord.py
Normal file
@@ -0,0 +1,150 @@
|
||||
import logging
|
||||
import os
|
||||
import hashlib
|
||||
|
||||
from embedchain.helpers.json_serializable import register_deserializable
|
||||
from embedchain.loaders.base_loader import BaseLoader
|
||||
|
||||
|
||||
@register_deserializable
|
||||
class DiscordLoader(BaseLoader):
|
||||
"""
|
||||
Load data from a Discord Channel ID.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
if not os.environ.get("DISCORD_TOKEN"):
|
||||
raise ValueError("DISCORD_TOKEN is not set")
|
||||
|
||||
self.token = os.environ.get("DISCORD_TOKEN")
|
||||
|
||||
@staticmethod
|
||||
def _format_message(message):
|
||||
return {
|
||||
"message_id": message.id,
|
||||
"content": message.content,
|
||||
"author": {
|
||||
"id": message.author.id,
|
||||
"name": message.author.name,
|
||||
"discriminator": message.author.discriminator,
|
||||
},
|
||||
"created_at": message.created_at.isoformat(),
|
||||
"attachments": [
|
||||
{
|
||||
"id": attachment.id,
|
||||
"filename": attachment.filename,
|
||||
"size": attachment.size,
|
||||
"url": attachment.url,
|
||||
"proxy_url": attachment.proxy_url,
|
||||
"height": attachment.height,
|
||||
"width": attachment.width,
|
||||
}
|
||||
for attachment in message.attachments
|
||||
],
|
||||
"embeds": [
|
||||
{
|
||||
"title": embed.title,
|
||||
"type": embed.type,
|
||||
"description": embed.description,
|
||||
"url": embed.url,
|
||||
"timestamp": embed.timestamp.isoformat(),
|
||||
"color": embed.color,
|
||||
"footer": {
|
||||
"text": embed.footer.text,
|
||||
"icon_url": embed.footer.icon_url,
|
||||
"proxy_icon_url": embed.footer.proxy_icon_url,
|
||||
},
|
||||
"image": {
|
||||
"url": embed.image.url,
|
||||
"proxy_url": embed.image.proxy_url,
|
||||
"height": embed.image.height,
|
||||
"width": embed.image.width,
|
||||
},
|
||||
"thumbnail": {
|
||||
"url": embed.thumbnail.url,
|
||||
"proxy_url": embed.thumbnail.proxy_url,
|
||||
"height": embed.thumbnail.height,
|
||||
"width": embed.thumbnail.width,
|
||||
},
|
||||
"video": {
|
||||
"url": embed.video.url,
|
||||
"height": embed.video.height,
|
||||
"width": embed.video.width,
|
||||
},
|
||||
"provider": {
|
||||
"name": embed.provider.name,
|
||||
"url": embed.provider.url,
|
||||
},
|
||||
"author": {
|
||||
"name": embed.author.name,
|
||||
"url": embed.author.url,
|
||||
"icon_url": embed.author.icon_url,
|
||||
"proxy_icon_url": embed.author.proxy_icon_url,
|
||||
},
|
||||
"fields": [
|
||||
{
|
||||
"name": field.name,
|
||||
"value": field.value,
|
||||
"inline": field.inline,
|
||||
}
|
||||
for field in embed.fields
|
||||
],
|
||||
}
|
||||
for embed in message.embeds
|
||||
],
|
||||
}
|
||||
|
||||
def load_data(self, channel_id: str):
|
||||
"""Load data from a Discord Channel ID."""
|
||||
import discord
|
||||
|
||||
messages = []
|
||||
|
||||
class DiscordClient(discord.Client):
|
||||
async def on_ready(self) -> None:
|
||||
logging.info("Logged on as {0}!".format(self.user))
|
||||
try:
|
||||
channel = self.get_channel(int(channel_id))
|
||||
if not isinstance(channel, discord.TextChannel):
|
||||
raise ValueError(
|
||||
f"Channel {channel_id} is not a text channel. " "Only text channels are supported for now."
|
||||
)
|
||||
threads = {}
|
||||
|
||||
for thread in channel.threads:
|
||||
threads[thread.id] = thread
|
||||
|
||||
async for message in channel.history(limit=None):
|
||||
messages.append(DiscordLoader._format_message(message))
|
||||
if message.id in threads:
|
||||
async for thread_message in threads[message.id].history(limit=None):
|
||||
messages.append(DiscordLoader._format_message(thread_message))
|
||||
|
||||
except Exception as e:
|
||||
logging.error(e)
|
||||
await self.close()
|
||||
finally:
|
||||
await self.close()
|
||||
|
||||
intents = discord.Intents.default()
|
||||
intents.message_content = True
|
||||
client = DiscordClient(intents=intents)
|
||||
client.run(self.token)
|
||||
|
||||
meta_data = {
|
||||
"url": channel_id,
|
||||
}
|
||||
|
||||
messages = str(messages)
|
||||
|
||||
doc_id = hashlib.sha256((messages + channel_id).encode()).hexdigest()
|
||||
|
||||
return {
|
||||
"doc_id": doc_id,
|
||||
"data": [
|
||||
{
|
||||
"content": messages,
|
||||
"meta_data": meta_data,
|
||||
}
|
||||
],
|
||||
}
|
||||
@@ -36,6 +36,7 @@ class IndirectDataType(Enum):
|
||||
SUBSTACK = "substack"
|
||||
GITHUB = "github"
|
||||
YOUTUBE_CHANNEL = "youtube_channel"
|
||||
DISCORD = "discord"
|
||||
|
||||
|
||||
class SpecialDataType(Enum):
|
||||
@@ -71,3 +72,4 @@ class DataType(Enum):
|
||||
SUBSTACK = IndirectDataType.SUBSTACK.value
|
||||
GITHUB = IndirectDataType.GITHUB.value
|
||||
YOUTUBE_CHANNEL = IndirectDataType.YOUTUBE_CHANNEL.value
|
||||
DISCORD = IndirectDataType.DISCORD.value
|
||||
|
||||
Reference in New Issue
Block a user