feat: add embedchain javascript package (#576)

This commit is contained in:
Taranjeet Singh
2023-09-06 17:22:44 -07:00
committed by GitHub
parent f582d70031
commit 3c3d98b9c3
44 changed files with 20073 additions and 0 deletions

View File

@@ -0,0 +1,44 @@
import { createHash } from 'crypto';
import type { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
import type { BaseLoader } from '../loaders';
import type { Input, LoaderResult } from '../models';
import type { ChunkResult } from '../models/ChunkResult';
class BaseChunker {
textSplitter: RecursiveCharacterTextSplitter;
constructor(textSplitter: RecursiveCharacterTextSplitter) {
this.textSplitter = textSplitter;
}
async createChunks(loader: BaseLoader, url: Input): Promise<ChunkResult> {
const documents: ChunkResult['documents'] = [];
const ids: ChunkResult['ids'] = [];
const datas: LoaderResult = await loader.loadData(url);
const metadatas: ChunkResult['metadatas'] = [];
const dataPromises = datas.map(async (data) => {
const { content, metaData } = data;
const chunks: string[] = await this.textSplitter.splitText(content);
chunks.forEach((chunk) => {
const chunkId = createHash('sha256')
.update(chunk + metaData.url)
.digest('hex');
ids.push(chunkId);
documents.push(chunk);
metadatas.push(metaData);
});
});
await Promise.all(dataPromises);
return {
documents,
ids,
metadatas,
};
}
}
export { BaseChunker };

View File

@@ -0,0 +1,26 @@
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
import { BaseChunker } from './BaseChunker';
interface TextSplitterChunkParams {
chunkSize: number;
chunkOverlap: number;
keepSeparator: boolean;
}
const TEXT_SPLITTER_CHUNK_PARAMS: TextSplitterChunkParams = {
chunkSize: 1000,
chunkOverlap: 0,
keepSeparator: false,
};
class PdfFileChunker extends BaseChunker {
constructor() {
const textSplitter = new RecursiveCharacterTextSplitter(
TEXT_SPLITTER_CHUNK_PARAMS
);
super(textSplitter);
}
}
export { PdfFileChunker };

View File

@@ -0,0 +1,26 @@
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
import { BaseChunker } from './BaseChunker';
interface TextSplitterChunkParams {
chunkSize: number;
chunkOverlap: number;
keepSeparator: boolean;
}
const TEXT_SPLITTER_CHUNK_PARAMS: TextSplitterChunkParams = {
chunkSize: 300,
chunkOverlap: 0,
keepSeparator: false,
};
class QnaPairChunker extends BaseChunker {
constructor() {
const textSplitter = new RecursiveCharacterTextSplitter(
TEXT_SPLITTER_CHUNK_PARAMS
);
super(textSplitter);
}
}
export { QnaPairChunker };

View File

@@ -0,0 +1,26 @@
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
import { BaseChunker } from './BaseChunker';
interface TextSplitterChunkParams {
chunkSize: number;
chunkOverlap: number;
keepSeparator: boolean;
}
const TEXT_SPLITTER_CHUNK_PARAMS: TextSplitterChunkParams = {
chunkSize: 500,
chunkOverlap: 0,
keepSeparator: false,
};
class WebPageChunker extends BaseChunker {
constructor() {
const textSplitter = new RecursiveCharacterTextSplitter(
TEXT_SPLITTER_CHUNK_PARAMS
);
super(textSplitter);
}
}
export { WebPageChunker };

View File

@@ -0,0 +1,6 @@
import { BaseChunker } from './BaseChunker';
import { PdfFileChunker } from './PdfFile';
import { QnaPairChunker } from './QnaPair';
import { WebPageChunker } from './WebPage';
export { BaseChunker, PdfFileChunker, QnaPairChunker, WebPageChunker };