feat: add embedchain javascript package (#576)
This commit is contained in:
26
embedchain-js/embedchain/chunkers/PdfFile.ts
Normal file
26
embedchain-js/embedchain/chunkers/PdfFile.ts
Normal file
@@ -0,0 +1,26 @@
|
||||
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
|
||||
|
||||
import { BaseChunker } from './BaseChunker';
|
||||
|
||||
interface TextSplitterChunkParams {
|
||||
chunkSize: number;
|
||||
chunkOverlap: number;
|
||||
keepSeparator: boolean;
|
||||
}
|
||||
|
||||
const TEXT_SPLITTER_CHUNK_PARAMS: TextSplitterChunkParams = {
|
||||
chunkSize: 1000,
|
||||
chunkOverlap: 0,
|
||||
keepSeparator: false,
|
||||
};
|
||||
|
||||
class PdfFileChunker extends BaseChunker {
|
||||
constructor() {
|
||||
const textSplitter = new RecursiveCharacterTextSplitter(
|
||||
TEXT_SPLITTER_CHUNK_PARAMS
|
||||
);
|
||||
super(textSplitter);
|
||||
}
|
||||
}
|
||||
|
||||
export { PdfFileChunker };
|
||||
Reference in New Issue
Block a user