Files
t6_mem0/embedchain-js/embedchain/chunkers/PdfFile.ts
2023-09-07 05:52:44 +05:30

27 lines
580 B
TypeScript

import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
import { BaseChunker } from './BaseChunker';
interface TextSplitterChunkParams {
chunkSize: number;
chunkOverlap: number;
keepSeparator: boolean;
}
const TEXT_SPLITTER_CHUNK_PARAMS: TextSplitterChunkParams = {
chunkSize: 1000,
chunkOverlap: 0,
keepSeparator: false,
};
class PdfFileChunker extends BaseChunker {
constructor() {
const textSplitter = new RecursiveCharacterTextSplitter(
TEXT_SPLITTER_CHUNK_PARAMS
);
super(textSplitter);
}
}
export { PdfFileChunker };