feat: add embedchain javascript package (#576)

This commit is contained in:
Taranjeet Singh
2023-09-06 17:22:44 -07:00
committed by GitHub
parent f582d70031
commit 3c3d98b9c3
44 changed files with 20073 additions and 0 deletions

View File

@@ -0,0 +1,51 @@
import axios from 'axios';
import { JSDOM } from 'jsdom';
import { cleanString } from '../utils';
import { BaseLoader } from './BaseLoader';
class WebPageLoader extends BaseLoader {
// eslint-disable-next-line class-methods-use-this
async loadData(url: string) {
const response = await axios.get(url);
const html = response.data;
const dom = new JSDOM(html);
const { document } = dom.window;
const unwantedTags = [
'nav',
'aside',
'form',
'header',
'noscript',
'svg',
'canvas',
'footer',
'script',
'style',
];
unwantedTags.forEach((tagName) => {
const elements = document.getElementsByTagName(tagName);
Array.from(elements).forEach((element) => {
// eslint-disable-next-line no-param-reassign
(element as HTMLElement).textContent = ' ';
});
});
const output = [];
let content = document.body.textContent;
if (!content) {
throw new Error('Web page content is empty.');
}
content = cleanString(content);
const metaData = {
url,
};
output.push({
content,
metaData,
});
return output;
}
}
export { WebPageLoader };