Remove embedchain js (#1408)

2024-06-10 13:24:56 -07:00
parent 52fd3e0dd4
commit 445fed4d3f
44 changed files with 0 additions and 20064 deletions
--- a/embedchain-js/.eslintignore
+++ b/embedchain-js/.eslintignore
@@ -1,2 +0,0 @@
-node_modules
-dist
--- a/embedchain-js/.eslintrc
+++ b/embedchain-js/.eslintrc
@@ -1,56 +0,0 @@
-{
-    // Configuration for JavaScript files
-    "extends": [
-      "airbnb-base",
-      "plugin:prettier/recommended"
-    ],
-    "rules": {
-      "prettier/prettier": [
-        "error",
-        {
-          "singleQuote": true,
-          "endOfLine": "auto"
-        }
-      ]
-    },
-    "overrides": [
-      // Configuration for TypeScript files
-      {
-        "files": ["**/*.ts", "**/__tests__/*.test.ts"],
-        "plugins": [
-          "@typescript-eslint",
-          "unused-imports",
-          "simple-import-sort"
-        ],
-        "extends": [
-          "airbnb-typescript",
-          "plugin:prettier/recommended"
-        ],
-        "parserOptions": {
-          "project": "./tsconfig.json"
-        },
-        "rules": {
-          "prettier/prettier": [
-            "error",
-            {
-              "singleQuote": true,
-              "endOfLine": "auto"
-            }
-          ],
-          "@typescript-eslint/comma-dangle": "off", // Avoid conflict rule between Eslint and Prettier
-          "@typescript-eslint/consistent-type-imports": "error", // Ensure `import type` is used when it's necessary
-          "import/prefer-default-export": "off", // Named export is easier to refactor automatically
-          "simple-import-sort/imports": "error", // Import configuration for `eslint-plugin-simple-import-sort`
-          "simple-import-sort/exports": "error", // Export configuration for `eslint-plugin-simple-import-sort`
-          "@typescript-eslint/no-unused-vars": "off",
-          "react/jsx-filename-extension": "off", // Gives error
-          "unused-imports/no-unused-imports": "error",
-          "unused-imports/no-unused-vars": [
-            "error",
-            { "argsIgnorePattern": "^_" }
-          ]
-        }
-      }
-    ]
-  }
-  
--- a/embedchain-js/.github/workflows/npm-publish.yml
+++ b/embedchain-js/.github/workflows/npm-publish.yml
@@ -1,47 +0,0 @@
-name: Node.js Package
-
-on:
-  release:
-    types: [created]
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v3
-      - uses: actions/setup-node@v3
-        with:
-          node-version: 16
-      - run: npm ci
-      - run: npm test
-      - run: npm run build
-      - uses: actions/upload-artifact@v3
-        with:
-          name: dist
-          path: dist
-      - uses: actions/upload-artifact@v3
-        with:
-          name: types
-          path: types
-
-  publish-npm:
-    needs: build
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v3
-      - uses: actions/setup-node@v3
-        with:
-          node-version: 16
-          registry-url: https://registry.npmjs.org/
-      - uses: actions/download-artifact@v3
-        with:
-          name: dist
-          path: dist
-      - uses: actions/download-artifact@v3
-        with:
-          name: types
-          path: types
-      - run: npm ci
-      - run: npm publish
-        env:
-          NODE_AUTH_TOKEN: ${{secrets.npm_token}}
--- a/embedchain-js/.gitignore
+++ b/embedchain-js/.gitignore
@@ -1,138 +0,0 @@
-# Logs
-logs
-*.log
-npm-debug.log*
-yarn-debug.log*
-yarn-error.log*
-lerna-debug.log*
-.pnpm-debug.log*
-
-# Diagnostic reports (https://nodejs.org/api/report.html)
-report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
-
-# Runtime data
-pids
-*.pid
-*.seed
-*.pid.lock
-
-# Directory for instrumented libs generated by jscoverage/JSCover
-lib-cov
-
-# Coverage directory used by tools like istanbul
-coverage
-*.lcov
-
-# nyc test coverage
-.nyc_output
-
-# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
-.grunt
-
-# Bower dependency directory (https://bower.io/)
-bower_components
-
-# node-waf configuration
-.lock-wscript
-
-# Compiled binary addons (https://nodejs.org/api/addons.html)
-build/Release
-
-# Dependency directories
-node_modules/
-jspm_packages/
-
-# Snowpack dependency directory (https://snowpack.dev/)
-web_modules/
-
-# TypeScript cache
-*.tsbuildinfo
-
-# Optional npm cache directory
-.npm
-
-# Optional eslint cache
-.eslintcache
-
-# Optional stylelint cache
-.stylelintcache
-
-# Microbundle cache
-.rpt2_cache/
-.rts2_cache_cjs/
-.rts2_cache_es/
-.rts2_cache_umd/
-
-# Optional REPL history
-.node_repl_history
-
-# Output of 'npm pack'
-*.tgz
-
-# Yarn Integrity file
-.yarn-integrity
-
-# dotenv environment variable files
-.env
-.env.development.local
-.env.test.local
-.env.production.local
-.env.local
-
-# parcel-bundler cache (https://parceljs.org/)
-.cache
-.parcel-cache
-
-# Next.js build output
-.next
-out
-
-# Nuxt.js build / generate output
-.nuxt
-dist
-
-# Gatsby files
-.cache/
-# Comment in the public line in if your project uses Gatsby and not Next.js
-# https://nextjs.org/blog/next-9-1#public-directory-support
-# public
-
-# vuepress build output
-.vuepress/dist
-
-# vuepress v2.x temp and cache directory
-.temp
-.cache
-
-# Docusaurus cache and generated files
-.docusaurus
-
-# Serverless directories
-.serverless/
-
-# FuseBox cache
-.fusebox/
-
-# DynamoDB Local files
-.dynamodb/
-
-# TernJS port file
-.tern-port
-
-# Stores VSCode versions used for testing VSCode extensions
-.vscode-test
-
-# yarn v2
-.yarn/cache
-.yarn/unplugged
-.yarn/build-state.yml
-.yarn/install-state.gz
-.pnp.*
-
-.ideas.md
-.todos.md
-
-# Custom
-dist
-types
-build
--- a/embedchain-js/.husky/commit-msg
+++ b/embedchain-js/.husky/commit-msg
@@ -1,4 +0,0 @@
-#!/bin/sh
-. "$(dirname "$0")/_/husky.sh"
-
-npx --no -- commitlint --edit $1
--- a/embedchain-js/.husky/pre-commit
+++ b/embedchain-js/.husky/pre-commit
@@ -1,5 +0,0 @@
-#!/bin/sh
-. "$(dirname "$0")/_/husky.sh"
-
-# Disable concurent to run `check-types` after ESLint in lint-staged
-npx lint-staged --concurrent false
--- a/embedchain-js/CITATION.cff
+++ b/embedchain-js/CITATION.cff
@@ -1,8 +0,0 @@
-cff-version: 1.2.0
-message: "If you use this software, please cite it as below."
-authors:
- family-names: "Singh"
-  given-names: "Taranjeet"
-title: "Embedchain"
-date-released: 2023-06-25
-url: "https://github.com/embedchain/embedchainjs"
--- a/embedchain-js/LICENSE
+++ b/embedchain-js/LICENSE
@@ -1,201 +0,0 @@
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
--- a/embedchain-js/README.md
+++ b/embedchain-js/README.md
@@ -1,254 +0,0 @@
-# embedchainjs
-
-[![Discord](https://dcbadge.vercel.app/api/server/CUU9FPhRNt?style=flat)](https://discord.gg/CUU9FPhRNt)
-[![Twitter](https://img.shields.io/twitter/follow/embedchain)](https://twitter.com/embedchain)
-[![Substack](https://img.shields.io/badge/Substack-%23006f5c.svg?logo=substack)](https://embedchain.substack.com/)
-
-embedchain is a framework to easily create LLM powered bots over any dataset. embedchainjs is Javascript version of embedchain. If you want a python version, check out [embedchain-python](https://github.com/embedchain/embedchain)
-
-# 🤝 Let's Talk Embedchain!
-
-Schedule a [Feedback Session](https://cal.com/taranjeetio/ec) with Taranjeet, the founder, to discuss any issues, provide feedback, or explore improvements.
-
-# How it works
-
-It abstracts the entire process of loading dataset, chunking it, creating embeddings and then storing in vector database.
-
-You can add a single or multiple dataset using `.add` and `.addLocal` function and then use `.query` function to find an answer from the added datasets.
-
-If you want to create a Naval Ravikant bot which has 2 of his blog posts, as well as a question and answer pair you supply, all you need to do is add the links to the blog posts and the QnA pair and embedchain will create a bot for you.
-
-```javascript
-const dotenv = require("dotenv");
-dotenv.config();
-const { App } = require("embedchain");
-
-//Run the app commands inside an async function only
-async function testApp() {
-  const navalChatBot = await App();
-
-  // Embed Online Resources
-  await navalChatBot.add("web_page", "https://nav.al/feedback");
-  await navalChatBot.add("web_page", "https://nav.al/agi");
-  await navalChatBot.add(
-    "pdf_file",
-    "https://navalmanack.s3.amazonaws.com/Eric-Jorgenson_The-Almanack-of-Naval-Ravikant_Final.pdf"
-  );
-
-  // Embed Local Resources
-  await navalChatBot.addLocal("qna_pair", [
-    "Who is Naval Ravikant?",
-    "Naval Ravikant is an Indian-American entrepreneur and investor.",
-  ]);
-
-  const result = await navalChatBot.query(
-    "What unique capacity does Naval argue humans possess when it comes to understanding explanations or concepts?"
-  );
-  console.log(result);
-  // answer: Naval argues that humans possess the unique capacity to understand explanations or concepts to the maximum extent possible in this physical reality.
-}
-
-testApp();
-```
-
-# Getting Started
-
-## Installation
-
- First make sure that you have the package installed. If not, then install it using `npm`
-
-```bash
-npm install embedchain && npm install -S openai@^3.3.0
-```
-
- Currently, it is only compatible with openai 3.X, not the latest version 4.X. Please make sure to use the right version, otherwise you will see the `ChromaDB` error `TypeError: OpenAIApi.Configuration is not a constructor`
-
- Make sure that dotenv package is installed and your `OPENAI_API_KEY` in a file called `.env` in the root folder. You can install dotenv by
-
-```js
-npm install dotenv
-```
-
- Download and install Docker on your device by visiting [this link](https://www.docker.com/). You will need this to run Chroma vector database on your machine.
-
- Run the following commands to setup Chroma container in Docker
-
-```bash
-git clone https://github.com/chroma-core/chroma.git
-cd chroma
-docker-compose up -d --build
-```
-
- Once Chroma container has been set up, run it inside Docker
-
-## Usage
-
- We use OpenAI's embedding model to create embeddings for chunks and ChatGPT API as LLM to get answer given the relevant docs. Make sure that you have an OpenAI account and an API key. If you have dont have an API key, you can create one by visiting [this link](https://platform.openai.com/account/api-keys).
-
- Once you have the API key, set it in an environment variable called `OPENAI_API_KEY`
-
-```js
-// Set this inside your .env file
-OPENAI_API_KEY = "sk-xxxx";
-```
-
- Load the environment variables inside your .js file using the following commands
-
-```js
-const dotenv = require("dotenv");
-dotenv.config();
-```
-
- Next import the `App` class from embedchain and use `.add` function to add any dataset.
- Now your app is created. You can use `.query` function to get the answer for any query.
-
-```js
-const dotenv = require("dotenv");
-dotenv.config();
-const { App } = require("embedchain");
-
-async function testApp() {
-  const navalChatBot = await App();
-
-  // Embed Online Resources
-  await navalChatBot.add("web_page", "https://nav.al/feedback");
-  await navalChatBot.add("web_page", "https://nav.al/agi");
-  await navalChatBot.add(
-    "pdf_file",
-    "https://navalmanack.s3.amazonaws.com/Eric-Jorgenson_The-Almanack-of-Naval-Ravikant_Final.pdf"
-  );
-
-  // Embed Local Resources
-  await navalChatBot.addLocal("qna_pair", [
-    "Who is Naval Ravikant?",
-    "Naval Ravikant is an Indian-American entrepreneur and investor.",
-  ]);
-
-  const result = await navalChatBot.query(
-    "What unique capacity does Naval argue humans possess when it comes to understanding explanations or concepts?"
-  );
-  console.log(result);
-  // answer: Naval argues that humans possess the unique capacity to understand explanations or concepts to the maximum extent possible in this physical reality.
-}
-
-testApp();
-```
-
- If there is any other app instance in your script or app, you can change the import as
-
-```javascript
-const { App: EmbedChainApp } = require("embedchain");
-
-// or
-
-const { App: ECApp } = require("embedchain");
-```
-
-## Format supported
-
-We support the following formats:
-
-### PDF File
-
-To add any pdf file, use the data_type as `pdf_file`. Eg:
-
-```javascript
-await app.add("pdf_file", "a_valid_url_where_pdf_file_can_be_accessed");
-```
-
-### Web Page
-
-To add any web page, use the data_type as `web_page`. Eg:
-
-```javascript
-await app.add("web_page", "a_valid_web_page_url");
-```
-
-### QnA Pair
-
-To supply your own QnA pair, use the data_type as `qna_pair` and enter a tuple. Eg:
-
-```javascript
-await app.addLocal("qna_pair", ["Question", "Answer"]);
-```
-
-### More Formats coming soon
-
- If you want to add any other format, please create an [issue](https://github.com/embedchain/embedchainjs/issues) and we will add it to the list of supported formats.
-
-## Testing
-
-Before you consume valuable tokens, you should make sure that the embedding you have done works and that it's receiving the correct document from the database.
-
-For this you can use the `dryRun` method.
-
-Following the example above, add this to your script:
-
-```js
-let result = await naval_chat_bot.dryRun("What unique capacity does Naval argue humans possess when it comes to understanding explanations or concepts?");console.log(result);
-
-'''
-Use the following pieces of context to answer the query at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
-terms of the unseen. And I think that’s critical. That is what humans do uniquely that no other creature, no other computer, no other intelligence—biological or artificial—that we have ever encountered does. And not only do we do it uniquely, but if we were to meet an alien species that also had the power to generate these good explanations, there is no explanation that they could generate that we could not understand. We are maximally capable of understanding. There is no concept out there that is possible in this physical reality that a human being, given sufficient time and resources and
-Query: What unique capacity does Naval argue humans possess when it comes to understanding explanations or concepts?
-Helpful Answer:
-'''
-```
-
-_The embedding is confirmed to work as expected. It returns the right document, even if the question is asked slightly different. No prompt tokens have been consumed._
-
-**The dry run will still consume tokens to embed your query, but it is only ~1/15 of the prompt.**
-
-# How does it work?
-
-Creating a chat bot over any dataset needs the following steps to happen
-
- load the data
- create meaningful chunks
- create embeddings for each chunk
- store the chunks in vector database
-
-Whenever a user asks any query, following process happens to find the answer for the query
-
- create the embedding for query
- find similar documents for this query from vector database
- pass similar documents as context to LLM to get the final answer.
-
-The process of loading the dataset and then querying involves multiple steps and each steps has nuances of it is own.
-
- How should I chunk the data? What is a meaningful chunk size?
- How should I create embeddings for each chunk? Which embedding model should I use?
- How should I store the chunks in vector database? Which vector database should I use?
- Should I store meta data along with the embeddings?
- How should I find similar documents for a query? Which ranking model should I use?
-
-These questions may be trivial for some but for a lot of us, it needs research, experimentation and time to find out the accurate answers.
-
-embedchain is a framework which takes care of all these nuances and provides a simple interface to create bots over any dataset.
-
-In the first release, we are making it easier for anyone to get a chatbot over any dataset up and running in less than a minute. All you need to do is create an app instance, add the data sets using `.add` function and then use `.query` function to get the relevant answer.
-
-# Team
-
-## Author
-
- Taranjeet Singh ([@taranjeetio](https://twitter.com/taranjeetio))
-
-## Maintainer
-
- [cachho](https://github.com/cachho)
- [sahilyadav902](https://github.com/sahilyadav902)
-
-## Citation
-
-If you utilize this repository, please consider citing it with:
-```
-@misc{embedchain,
-  author = {Taranjeet Singh},
-  title = {Embechain: Framework to easily create LLM powered bots over any dataset},
-  year = {2023},
-  publisher = {GitHub},
-  journal = {GitHub repository},
-  howpublished = {\url{https://github.com/embedchain/embedchainjs}},
-}
-```
--- a/embedchain-js/commitlint.config.js
+++ b/embedchain-js/commitlint.config.js
@@ -1 +0,0 @@
-module.exports = { extends: ['@commitlint/config-conventional'] };
--- a/embedchain-js/embedchain/tests/readme.test.ts
+++ b/embedchain-js/embedchain/tests/readme.test.ts
@@ -1,66 +0,0 @@
-import { EmbedChainApp } from '../embedchain';
-
-const mockAdd = jest.fn();
-const mockAddLocal = jest.fn();
-const mockQuery = jest.fn();
-
-jest.mock('../embedchain', () => {
-  return {
-    EmbedChainApp: jest.fn().mockImplementation(() => {
-      return {
-        add: mockAdd,
-        addLocal: mockAddLocal,
-        query: mockQuery,
-      };
-    }),
-  };
-});
-
-describe('Test App', () => {
-  beforeEach(() => {
-    jest.clearAllMocks();
-  });
-
-  it('tests the App', async () => {
-    mockQuery.mockResolvedValue(
-      'Naval argues that humans possess the unique capacity to understand explanations or concepts to the maximum extent possible in this physical reality.'
-    );
-
-    const navalChatBot = await new EmbedChainApp(undefined, false);
-
-    // Embed Online Resources
-    await navalChatBot.add('web_page', 'https://nav.al/feedback');
-    await navalChatBot.add('web_page', 'https://nav.al/agi');
-    await navalChatBot.add(
-      'pdf_file',
-      'https://navalmanack.s3.amazonaws.com/Eric-Jorgenson_The-Almanack-of-Naval-Ravikant_Final.pdf'
-    );
-
-    // Embed Local Resources
-    await navalChatBot.addLocal('qna_pair', [
-      'Who is Naval Ravikant?',
-      'Naval Ravikant is an Indian-American entrepreneur and investor.',
-    ]);
-
-    const result = await navalChatBot.query(
-      'What unique capacity does Naval argue humans possess when it comes to understanding explanations or concepts?'
-    );
-
-    expect(mockAdd).toHaveBeenCalledWith('web_page', 'https://nav.al/feedback');
-    expect(mockAdd).toHaveBeenCalledWith('web_page', 'https://nav.al/agi');
-    expect(mockAdd).toHaveBeenCalledWith(
-      'pdf_file',
-      'https://navalmanack.s3.amazonaws.com/Eric-Jorgenson_The-Almanack-of-Naval-Ravikant_Final.pdf'
-    );
-    expect(mockAddLocal).toHaveBeenCalledWith('qna_pair', [
-      'Who is Naval Ravikant?',
-      'Naval Ravikant is an Indian-American entrepreneur and investor.',
-    ]);
-    expect(mockQuery).toHaveBeenCalledWith(
-      'What unique capacity does Naval argue humans possess when it comes to understanding explanations or concepts?'
-    );
-    expect(result).toBe(
-      'Naval argues that humans possess the unique capacity to understand explanations or concepts to the maximum extent possible in this physical reality.'
-    );
-  });
-});
--- a/embedchain-js/embedchain/chunkers/BaseChunker.ts
+++ b/embedchain-js/embedchain/chunkers/BaseChunker.ts
@@ -1,44 +0,0 @@
-import { createHash } from 'crypto';
-import type { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
-
-import type { BaseLoader } from '../loaders';
-import type { Input, LoaderResult } from '../models';
-import type { ChunkResult } from '../models/ChunkResult';
-
-class BaseChunker {
-  textSplitter: RecursiveCharacterTextSplitter;
-
-  constructor(textSplitter: RecursiveCharacterTextSplitter) {
-    this.textSplitter = textSplitter;
-  }
-
-  async createChunks(loader: BaseLoader, url: Input): Promise<ChunkResult> {
-    const documents: ChunkResult['documents'] = [];
-    const ids: ChunkResult['ids'] = [];
-    const datas: LoaderResult = await loader.loadData(url);
-    const metadatas: ChunkResult['metadatas'] = [];
-
-    const dataPromises = datas.map(async (data) => {
-      const { content, metaData } = data;
-      const chunks: string[] = await this.textSplitter.splitText(content);
-      chunks.forEach((chunk) => {
-        const chunkId = createHash('sha256')
-          .update(chunk + metaData.url)
-          .digest('hex');
-        ids.push(chunkId);
-        documents.push(chunk);
-        metadatas.push(metaData);
-      });
-    });
-
-    await Promise.all(dataPromises);
-
-    return {
-      documents,
-      ids,
-      metadatas,
-    };
-  }
-}
-
-export { BaseChunker };
--- a/embedchain-js/embedchain/chunkers/PdfFile.ts
+++ b/embedchain-js/embedchain/chunkers/PdfFile.ts
@@ -1,26 +0,0 @@
-import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
-
-import { BaseChunker } from './BaseChunker';
-
-interface TextSplitterChunkParams {
-  chunkSize: number;
-  chunkOverlap: number;
-  keepSeparator: boolean;
-}
-
-const TEXT_SPLITTER_CHUNK_PARAMS: TextSplitterChunkParams = {
-  chunkSize: 1000,
-  chunkOverlap: 0,
-  keepSeparator: false,
-};
-
-class PdfFileChunker extends BaseChunker {
-  constructor() {
-    const textSplitter = new RecursiveCharacterTextSplitter(
-      TEXT_SPLITTER_CHUNK_PARAMS
-    );
-    super(textSplitter);
-  }
-}
-
-export { PdfFileChunker };
--- a/embedchain-js/embedchain/chunkers/QnaPair.ts
+++ b/embedchain-js/embedchain/chunkers/QnaPair.ts
@@ -1,26 +0,0 @@
-import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
-
-import { BaseChunker } from './BaseChunker';
-
-interface TextSplitterChunkParams {
-  chunkSize: number;
-  chunkOverlap: number;
-  keepSeparator: boolean;
-}
-
-const TEXT_SPLITTER_CHUNK_PARAMS: TextSplitterChunkParams = {
-  chunkSize: 300,
-  chunkOverlap: 0,
-  keepSeparator: false,
-};
-
-class QnaPairChunker extends BaseChunker {
-  constructor() {
-    const textSplitter = new RecursiveCharacterTextSplitter(
-      TEXT_SPLITTER_CHUNK_PARAMS
-    );
-    super(textSplitter);
-  }
-}
-
-export { QnaPairChunker };
--- a/embedchain-js/embedchain/chunkers/WebPage.ts
+++ b/embedchain-js/embedchain/chunkers/WebPage.ts
@@ -1,26 +0,0 @@
-import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
-
-import { BaseChunker } from './BaseChunker';
-
-interface TextSplitterChunkParams {
-  chunkSize: number;
-  chunkOverlap: number;
-  keepSeparator: boolean;
-}
-
-const TEXT_SPLITTER_CHUNK_PARAMS: TextSplitterChunkParams = {
-  chunkSize: 500,
-  chunkOverlap: 0,
-  keepSeparator: false,
-};
-
-class WebPageChunker extends BaseChunker {
-  constructor() {
-    const textSplitter = new RecursiveCharacterTextSplitter(
-      TEXT_SPLITTER_CHUNK_PARAMS
-    );
-    super(textSplitter);
-  }
-}
-
-export { WebPageChunker };
--- a/embedchain-js/embedchain/chunkers/index.ts
+++ b/embedchain-js/embedchain/chunkers/index.ts
@@ -1,6 +0,0 @@
-import { BaseChunker } from './BaseChunker';
-import { PdfFileChunker } from './PdfFile';
-import { QnaPairChunker } from './QnaPair';
-import { WebPageChunker } from './WebPage';
-
-export { BaseChunker, PdfFileChunker, QnaPairChunker, WebPageChunker };
--- a/embedchain-js/embedchain/embedchain.ts
+++ b/embedchain-js/embedchain/embedchain.ts
@@ -1,317 +0,0 @@
-/* eslint-disable max-classes-per-file */
-import type { Collection } from 'chromadb';
-import type { QueryResponse } from 'chromadb/dist/main/types';
-import * as fs from 'fs';
-import { Document } from 'langchain/document';
-import OpenAI from 'openai';
-import * as path from 'path';
-import { v4 as uuidv4 } from 'uuid';
-
-import type { BaseChunker } from './chunkers';
-import { PdfFileChunker, QnaPairChunker, WebPageChunker } from './chunkers';
-import type { BaseLoader } from './loaders';
-import { LocalQnaPairLoader, PdfFileLoader, WebPageLoader } from './loaders';
-import type {
-  DataDict,
-  DataType,
-  FormattedResult,
-  Input,
-  LocalInput,
-  Metadata,
-  Method,
-  RemoteInput,
-} from './models';
-import { ChromaDB } from './vectordb';
-import type { BaseVectorDB } from './vectordb/BaseVectorDb';
-
-const openai = new OpenAI({
-  apiKey: process.env.OPENAI_API_KEY,
-});
-
-class EmbedChain {
-  dbClient: any;
-
-  // TODO: Definitely assign
-  collection!: Collection;
-
-  userAsks: [DataType, Input][] = [];
-
-  initApp: Promise<void>;
-
-  collectMetrics: boolean;
-
-  sId: string; // sessionId
-
-  constructor(db?: BaseVectorDB, collectMetrics: boolean = true) {
-    if (!db) {
-      this.initApp = this.setupChroma();
-    } else {
-      this.initApp = this.setupOther(db);
-    }
-
-    this.collectMetrics = collectMetrics;
-
-    // Send anonymous telemetry
-    this.sId = uuidv4();
-    this.sendTelemetryEvent('init');
-  }
-
-  async setupChroma(): Promise<void> {
-    const db = new ChromaDB();
-    await db.initDb;
-    this.dbClient = db.client;
-    if (db.collection) {
-      this.collection = db.collection;
-    } else {
-      // TODO: Add proper error handling
-      console.error('No collection');
-    }
-  }
-
-  async setupOther(db: BaseVectorDB): Promise<void> {
-    await db.initDb;
-    // TODO: Figure out how we can initialize an unknown database.
-    // this.dbClient = db.client;
-    // this.collection = db.collection;
-    this.userAsks = [];
-  }
-
-  static getLoader(dataType: DataType) {
-    const loaders: { [t in DataType]: BaseLoader } = {
-      pdf_file: new PdfFileLoader(),
-      web_page: new WebPageLoader(),
-      qna_pair: new LocalQnaPairLoader(),
-    };
-    return loaders[dataType];
-  }
-
-  static getChunker(dataType: DataType) {
-    const chunkers: { [t in DataType]: BaseChunker } = {
-      pdf_file: new PdfFileChunker(),
-      web_page: new WebPageChunker(),
-      qna_pair: new QnaPairChunker(),
-    };
-    return chunkers[dataType];
-  }
-
-  public async add(dataType: DataType, url: RemoteInput) {
-    const loader = EmbedChain.getLoader(dataType);
-    const chunker = EmbedChain.getChunker(dataType);
-    this.userAsks.push([dataType, url]);
-    const { documents, countNewChunks } = await this.loadAndEmbed(
-      loader,
-      chunker,
-      url
-    );
-
-    if (this.collectMetrics) {
-      const wordCount = documents.reduce(
-        (sum, document) => sum + document.split(' ').length,
-        0
-      );
-
-      this.sendTelemetryEvent('add', {
-        data_type: dataType,
-        word_count: wordCount,
-        chunks_count: countNewChunks,
-      });
-    }
-  }
-
-  public async addLocal(dataType: DataType, content: LocalInput) {
-    const loader = EmbedChain.getLoader(dataType);
-    const chunker = EmbedChain.getChunker(dataType);
-    this.userAsks.push([dataType, content]);
-    const { documents, countNewChunks } = await this.loadAndEmbed(
-      loader,
-      chunker,
-      content
-    );
-
-    if (this.collectMetrics) {
-      const wordCount = documents.reduce(
-        (sum, document) => sum + document.split(' ').length,
-        0
-      );
-
-      this.sendTelemetryEvent('add_local', {
-        data_type: dataType,
-        word_count: wordCount,
-        chunks_count: countNewChunks,
-      });
-    }
-  }
-
-  protected async loadAndEmbed(
-    loader: any,
-    chunker: BaseChunker,
-    src: Input
-  ): Promise<{
-    documents: string[];
-    metadatas: Metadata[];
-    ids: string[];
-    countNewChunks: number;
-  }> {
-    const embeddingsData = await chunker.createChunks(loader, src);
-    let { documents, ids, metadatas } = embeddingsData;
-
-    const existingDocs = await this.collection.get({ ids });
-    const existingIds = new Set(existingDocs.ids);
-
-    if (existingIds.size > 0) {
-      const dataDict: DataDict = {};
-      for (let i = 0; i < ids.length; i += 1) {
-        const id = ids[i];
-        if (!existingIds.has(id)) {
-          dataDict[id] = { doc: documents[i], meta: metadatas[i] };
-        }
-      }
-
-      if (Object.keys(dataDict).length === 0) {
-        console.log(`All data from ${src} already exists in the database.`);
-        return { documents: [], metadatas: [], ids: [], countNewChunks: 0 };
-      }
-      ids = Object.keys(dataDict);
-      const dataValues = Object.values(dataDict);
-      documents = dataValues.map(({ doc }) => doc);
-      metadatas = dataValues.map(({ meta }) => meta);
-    }
-
-    const countBeforeAddition = await this.count();
-    await this.collection.add({ documents, metadatas, ids });
-    const countNewChunks = (await this.count()) - countBeforeAddition;
-    console.log(
-      `Successfully saved ${src}. New chunks count: ${countNewChunks}`
-    );
-    return { documents, metadatas, ids, countNewChunks };
-  }
-
-  static async formatResult(
-    results: QueryResponse
-  ): Promise<FormattedResult[]> {
-    return results.documents[0].map((document: any, index: number) => {
-      const metadata = results.metadatas[0][index] || {};
-      // TODO: Add proper error handling
-      const distance = results.distances ? results.distances[0][index] : null;
-      return [new Document({ pageContent: document, metadata }), distance];
-    });
-  }
-
-  static async getOpenAiAnswer(prompt: string) {
-    const messages: OpenAI.Chat.CreateChatCompletionRequestMessage[] = [
-      { role: 'user', content: prompt },
-    ];
-    const response = await openai.chat.completions.create({
-      model: 'gpt-3.5-turbo',
-      messages,
-      temperature: 0,
-      max_tokens: 1000,
-      top_p: 1,
-    });
-    return (
-      response.choices[0].message?.content ?? 'Response could not be processed.'
-    );
-  }
-
-  protected async retrieveFromDatabase(inputQuery: string) {
-    const result = await this.collection.query({
-      nResults: 1,
-      queryTexts: [inputQuery],
-    });
-    const resultFormatted = await EmbedChain.formatResult(result);
-    const content = resultFormatted[0][0].pageContent;
-    return content;
-  }
-
-  static generatePrompt(inputQuery: string, context: any) {
-    const prompt = `Use the following pieces of context to answer the query at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n${context}\nQuery: ${inputQuery}\nHelpful Answer:`;
-    return prompt;
-  }
-
-  static async getAnswerFromLlm(prompt: string) {
-    const answer = await EmbedChain.getOpenAiAnswer(prompt);
-    return answer;
-  }
-
-  public async query(inputQuery: string) {
-    const context = await this.retrieveFromDatabase(inputQuery);
-    const prompt = EmbedChain.generatePrompt(inputQuery, context);
-    const answer = await EmbedChain.getAnswerFromLlm(prompt);
-    this.sendTelemetryEvent('query');
-    return answer;
-  }
-
-  public async dryRun(input_query: string) {
-    const context = await this.retrieveFromDatabase(input_query);
-    const prompt = EmbedChain.generatePrompt(input_query, context);
-    return prompt;
-  }
-
-  /**
-   * Count the number of embeddings.
-   * @returns {Promise<number>}: The number of embeddings.
-   */
-  public count(): Promise<number> {
-    return this.collection.count();
-  }
-
-  protected async sendTelemetryEvent(method: Method, extraMetadata?: object) {
-    if (!this.collectMetrics) {
-      return;
-    }
-    const url = 'https://api.embedchain.ai/api/v1/telemetry/';
-
-    // Read package version from filesystem (because it's not in the ts root dir)
-    const packageJsonPath = path.join(__dirname, '..', 'package.json');
-    const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf8'));
-
-    const metadata = {
-      s_id: this.sId,
-      version: packageJson.version,
-      method,
-      language: 'js',
-      ...extraMetadata,
-    };
-
-    const maxRetries = 3;
-
-    // Retry the fetch
-    for (let i = 0; i < maxRetries; i += 1) {
-      try {
-        // eslint-disable-next-line no-await-in-loop
-        const response = await fetch(url, {
-          method: 'POST',
-          body: JSON.stringify({ metadata }),
-        });
-
-        if (response.ok) {
-          // Break out of the loop if the request was successful
-          break;
-        } else {
-          // Log the unsuccessful response (optional)
-          console.error(
-            `Telemetry: Attempt ${i + 1} failed with status:`,
-            response.status
-          );
-        }
-      } catch (error) {
-        // Log the error (optional)
-        console.error(`Telemetry: Attempt ${i + 1} failed with error:`, error);
-      }
-
-      // If this was the last attempt, throw an error or handle the failure
-      if (i === maxRetries - 1) {
-        console.error('Telemetry: Max retries reached');
-      }
-    }
-  }
-}
-
-class EmbedChainApp extends EmbedChain {
-  // The EmbedChain app.
-  // Has two functions: add and query.
-  // adds(dataType, url): adds the data from the given URL to the vector db.
-  // query(query): finds answer to the given query using vector database and LLM.
-}
-
-export { EmbedChainApp };
--- a/embedchain-js/embedchain/index.ts
+++ b/embedchain-js/embedchain/index.ts
@@ -1,7 +0,0 @@
-import { EmbedChainApp } from './embedchain';
-
-export const App = async () => {
-  const app = new EmbedChainApp();
-  await app.initApp;
-  return app;
-};
--- a/embedchain-js/embedchain/loaders/BaseLoader.ts
+++ b/embedchain-js/embedchain/loaders/BaseLoader.ts
@@ -1,5 +0,0 @@
-import type { Input, LoaderResult } from '../models';
-
-export abstract class BaseLoader {
-  abstract loadData(src: Input): Promise<LoaderResult>;
-}
--- a/embedchain-js/embedchain/loaders/LocalQnaPair.ts
+++ b/embedchain-js/embedchain/loaders/LocalQnaPair.ts
@@ -1,21 +0,0 @@
-import type { LoaderResult, QnaPair } from '../models';
-import { BaseLoader } from './BaseLoader';
-
-class LocalQnaPairLoader extends BaseLoader {
-  // eslint-disable-next-line class-methods-use-this
-  async loadData(content: QnaPair): Promise<LoaderResult> {
-    const [question, answer] = content;
-    const contentText = `Q: ${question}\nA: ${answer}`;
-    const metaData = {
-      url: 'local',
-    };
-    return [
-      {
-        content: contentText,
-        metaData,
-      },
-    ];
-  }
-}
-
-export { LocalQnaPairLoader };
--- a/embedchain-js/embedchain/loaders/PdfFile.ts
+++ b/embedchain-js/embedchain/loaders/PdfFile.ts
@@ -1,58 +0,0 @@
-import type { TextContent } from 'pdfjs-dist/types/src/display/api';
-
-import type { LoaderResult, Metadata } from '../models';
-import { cleanString } from '../utils';
-import { BaseLoader } from './BaseLoader';
-
-const pdfjsLib = require('pdfjs-dist');
-
-interface Page {
-  page_content: string;
-}
-
-class PdfFileLoader extends BaseLoader {
-  static async getPagesFromPdf(url: string): Promise<Page[]> {
-    const loadingTask = pdfjsLib.getDocument(url);
-    const pdf = await loadingTask.promise;
-    const { numPages } = pdf;
-
-    const promises = Array.from({ length: numPages }, async (_, i) => {
-      const page = await pdf.getPage(i + 1);
-      const pageText: TextContent = await page.getTextContent();
-      const pageContent: string = pageText.items
-        .map((item) => ('str' in item ? item.str : ''))
-        .join(' ');
-
-      return {
-        page_content: pageContent,
-      };
-    });
-
-    return Promise.all(promises);
-  }
-
-  // eslint-disable-next-line class-methods-use-this
-  async loadData(url: string): Promise<LoaderResult> {
-    const pages: Page[] = await PdfFileLoader.getPagesFromPdf(url);
-    const output: LoaderResult = [];
-
-    if (!pages.length) {
-      throw new Error('No data found');
-    }
-
-    pages.forEach((page) => {
-      let content: string = page.page_content;
-      content = cleanString(content);
-      const metaData: Metadata = {
-        url,
-      };
-      output.push({
-        content,
-        metaData,
-      });
-    });
-    return output;
-  }
-}
-
-export { PdfFileLoader };
--- a/embedchain-js/embedchain/loaders/WebPage.ts
+++ b/embedchain-js/embedchain/loaders/WebPage.ts
@@ -1,51 +0,0 @@
-import axios from 'axios';
-import { JSDOM } from 'jsdom';
-
-import { cleanString } from '../utils';
-import { BaseLoader } from './BaseLoader';
-
-class WebPageLoader extends BaseLoader {
-  // eslint-disable-next-line class-methods-use-this
-  async loadData(url: string) {
-    const response = await axios.get(url);
-    const html = response.data;
-    const dom = new JSDOM(html);
-    const { document } = dom.window;
-    const unwantedTags = [
-      'nav',
-      'aside',
-      'form',
-      'header',
-      'noscript',
-      'svg',
-      'canvas',
-      'footer',
-      'script',
-      'style',
-    ];
-    unwantedTags.forEach((tagName) => {
-      const elements = document.getElementsByTagName(tagName);
-      Array.from(elements).forEach((element) => {
-        // eslint-disable-next-line no-param-reassign
-        (element as HTMLElement).textContent = ' ';
-      });
-    });
-
-    const output = [];
-    let content = document.body.textContent;
-    if (!content) {
-      throw new Error('Web page content is empty.');
-    }
-    content = cleanString(content);
-    const metaData = {
-      url,
-    };
-    output.push({
-      content,
-      metaData,
-    });
-    return output;
-  }
-}
-
-export { WebPageLoader };
--- a/embedchain-js/embedchain/loaders/index.ts
+++ b/embedchain-js/embedchain/loaders/index.ts
@@ -1,6 +0,0 @@
-import { BaseLoader } from './BaseLoader';
-import { LocalQnaPairLoader } from './LocalQnaPair';
-import { PdfFileLoader } from './PdfFile';
-import { WebPageLoader } from './WebPage';
-
-export { BaseLoader, LocalQnaPairLoader, PdfFileLoader, WebPageLoader };
--- a/embedchain-js/embedchain/models/ChunkResult.ts
+++ b/embedchain-js/embedchain/models/ChunkResult.ts
@@ -1,7 +0,0 @@
-import type { Metadata } from './Metadata';
-
-export type ChunkResult = {
-  documents: string[];
-  ids: string[];
-  metadatas: Metadata[];
-};
--- a/embedchain-js/embedchain/models/DataDict.ts
+++ b/embedchain-js/embedchain/models/DataDict.ts
@@ -1,10 +0,0 @@
-import type { ChunkResult } from './ChunkResult';
-
-type Data = {
-  doc: ChunkResult['documents'][0];
-  meta: ChunkResult['metadatas'][0];
-};
-
-export type DataDict = {
-  [id: string]: Data;
-};
--- a/embedchain-js/embedchain/models/DataType.ts
+++ b/embedchain-js/embedchain/models/DataType.ts
@@ -1 +0,0 @@
-export type DataType = 'pdf_file' | 'web_page' | 'qna_pair';
--- a/embedchain-js/embedchain/models/FormattedResult.ts
+++ b/embedchain-js/embedchain/models/FormattedResult.ts
@@ -1,3 +0,0 @@
-import type { Document } from 'langchain/document';
-
-export type FormattedResult = [Document, number | null];
--- a/embedchain-js/embedchain/models/Input.ts
+++ b/embedchain-js/embedchain/models/Input.ts
@@ -1,7 +0,0 @@
-import type { QnaPair } from './QnAPair';
-
-export type RemoteInput = string;
-
-export type LocalInput = QnaPair;
-
-export type Input = RemoteInput | LocalInput;
--- a/embedchain-js/embedchain/models/LoaderResult.ts
+++ b/embedchain-js/embedchain/models/LoaderResult.ts
@@ -1,3 +0,0 @@
-import type { Metadata } from './Metadata';
-
-export type LoaderResult = { content: any; metaData: Metadata }[];
--- a/embedchain-js/embedchain/models/Metadata.ts
+++ b/embedchain-js/embedchain/models/Metadata.ts
@@ -1,3 +0,0 @@
-export type Metadata = {
-  url: string;
-};
--- a/embedchain-js/embedchain/models/Method.ts
+++ b/embedchain-js/embedchain/models/Method.ts
@@ -1 +0,0 @@
-export type Method = 'init' | 'query' | 'add' | 'add_local';
--- a/embedchain-js/embedchain/models/QnAPair.ts
+++ b/embedchain-js/embedchain/models/QnAPair.ts
@@ -1,4 +0,0 @@
-type Question = string;
-type Answer = string;
-
-export type QnaPair = [Question, Answer];
--- a/embedchain-js/embedchain/models/index.ts
+++ b/embedchain-js/embedchain/models/index.ts
@@ -1,21 +0,0 @@
-import { DataDict } from './DataDict';
-import { DataType } from './DataType';
-import { FormattedResult } from './FormattedResult';
-import { Input, LocalInput, RemoteInput } from './Input';
-import { LoaderResult } from './LoaderResult';
-import { Metadata } from './Metadata';
-import { Method } from './Method';
-import { QnaPair } from './QnAPair';
-
-export {
-  DataDict,
-  DataType,
-  FormattedResult,
-  Input,
-  LoaderResult,
-  LocalInput,
-  Metadata,
-  Method,
-  QnaPair,
-  RemoteInput,
-};
--- a/embedchain-js/embedchain/utils.ts
+++ b/embedchain-js/embedchain/utils.ts
@@ -1,26 +0,0 @@
-/**
- * This function takes in a string and performs a series of text cleaning operations.
- * @param {str} text: The text to be cleaned. This is expected to be a string.
- * @returns {str}: The cleaned text after all the cleaning operations have been performed.
- */
-export function cleanString(text: string): string {
-  // Replacement of newline characters:
-  let cleanedText = text.replace(/\n/g, ' ');
-
-  // Stripping and reducing multiple spaces to single:
-  cleanedText = cleanedText.trim().replace(/\s+/g, ' ');
-
-  // Removing backslashes:
-  cleanedText = cleanedText.replace(/\\/g, '');
-
-  // Replacing hash characters:
-  cleanedText = cleanedText.replace(/#/g, ' ');
-
-  // Eliminating consecutive non-alphanumeric characters:
-  // This regex identifies consecutive non-alphanumeric characters (i.e., not a word character [a-zA-Z0-9_] and not a whitespace) in the string
-  // and replaces each group of such characters with a single occurrence of that character.
-  // For example, "!!! hello !!!" would become "! hello !".
-  cleanedText = cleanedText.replace(/([^\w\s])\1*/g, '$1');
-
-  return cleanedText;
-}
--- a/embedchain-js/embedchain/vectordb/BaseVectorDb.ts
+++ b/embedchain-js/embedchain/vectordb/BaseVectorDb.ts
@@ -1,14 +0,0 @@
-class BaseVectorDB {
-  initDb: Promise<void>;
-
-  constructor() {
-    this.initDb = this.getClientAndCollection();
-  }
-
-  // eslint-disable-next-line class-methods-use-this
-  protected async getClientAndCollection(): Promise<void> {
-    throw new Error('getClientAndCollection() method is not implemented');
-  }
-}
-
-export { BaseVectorDB };
--- a/embedchain-js/embedchain/vectordb/ChromaDb.ts
+++ b/embedchain-js/embedchain/vectordb/ChromaDb.ts
@@ -1,38 +0,0 @@
-import type { Collection } from 'chromadb';
-import { ChromaClient, OpenAIEmbeddingFunction } from 'chromadb';
-
-import { BaseVectorDB } from './BaseVectorDb';
-
-const embedder = new OpenAIEmbeddingFunction({
-  openai_api_key: process.env.OPENAI_API_KEY ?? '',
-});
-
-class ChromaDB extends BaseVectorDB {
-  client: ChromaClient | undefined;
-
-  collection: Collection | null = null;
-
-  // eslint-disable-next-line @typescript-eslint/no-useless-constructor
-  constructor() {
-    super();
-  }
-
-  protected async getClientAndCollection(): Promise<void> {
-    this.client = new ChromaClient({ path: 'http://localhost:8000' });
-    try {
-      this.collection = await this.client.getCollection({
-        name: 'embedchain_store',
-        embeddingFunction: embedder,
-      });
-    } catch (err) {
-      if (!this.collection) {
-        this.collection = await this.client.createCollection({
-          name: 'embedchain_store',
-          embeddingFunction: embedder,
-        });
-      }
-    }
-  }
-}
-
-export { ChromaDB };
--- a/embedchain-js/embedchain/vectordb/index.ts
+++ b/embedchain-js/embedchain/vectordb/index.ts
@@ -1,3 +0,0 @@
-import { ChromaDB } from './ChromaDb';
-
-export { ChromaDB };
--- a/embedchain-js/index.js
+++ b/embedchain-js/index.js
@@ -1,9 +0,0 @@
-const { EmbedChainApp } = require("./embedchain/embedchain");
-
-async function App() {
-  const app = new EmbedChainApp();
-  await app.init_app;
-  return app;
-}
-
-module.exports = { App };
--- a/embedchain-js/jest.config.js
+++ b/embedchain-js/jest.config.js
@@ -1,5 +0,0 @@
-module.exports = {
-  preset: 'ts-jest',
-  testEnvironment: 'node',
-  testPathIgnorePatterns: ['.d.ts'],
-};
--- a/embedchain-js/lint-staged.config.js
+++ b/embedchain-js/lint-staged.config.js
@@ -1,5 +0,0 @@
-module.exports = {
-  '*.{js,ts}': ['eslint --fix', 'eslint'],
-  '**/*.ts?(x)': () => 'npm run check-types',
-  '*.json': ['prettier --write'],
-};
--- a/embedchain-js/package-lock.json
+++ b/embedchain-js/package-lock.json
--- a/embedchain-js/package.json
+++ b/embedchain-js/package.json
@@ -1,53 +0,0 @@
-{
-  "name": "embedchain",
-  "version": "0.0.8",
-  "description": "embedchain is a framework to easily create LLM powered bots over any dataset",
-  "main": "dist/index.js",
-  "types": "types/index.d.ts",
-  "files": [
-    "dist",
-    "types"
-  ],
-  "scripts": {
-    "build": "tsc -p tsconfig.build.json --listFiles",
-    "prepare": "husky install",
-    "test": "jest",
-    "check-types": "tsc --noEmit --pretty"
-  },
-  "author": "Taranjeet Singh",
-  "license": "Apache-2.0",
-  "dependencies": {
-    "axios": "^1.4.0",
-    "chromadb": "^1.5.6",
-    "jsdom": "^22.1.0",
-    "langchain": "^0.0.136",
-    "openai": "^4.3.1",
-    "pdfjs-dist": "^3.8.162",
-    "uuid": "^9.0.0"
-  },
-  "devDependencies": {
-    "@commitlint/cli": "^17.1.2",
-    "@commitlint/config-conventional": "^17.1.0",
-    "@commitlint/cz-commitlint": "^17.1.2",
-    "@types/jest": "^29.5.1",
-    "@types/jsdom": "^21.1.1",
-    "@typescript-eslint/eslint-plugin": "^5.41.0",
-    "@typescript-eslint/parser": "^5.41.0",
-    "eslint": "^8.34.0",
-    "eslint-config-airbnb-base": "^15.0.0",
-    "eslint-config-airbnb-typescript": "^17.0.0",
-    "eslint-config-prettier": "^8.5.0",
-    "eslint-plugin-import": "^2.27.5",
-    "eslint-plugin-prettier": "^4.2.1",
-    "eslint-plugin-simple-import-sort": "^8.0.0",
-    "eslint-plugin-testing-library": "^5.9.1",
-    "eslint-plugin-unused-imports": "^2.0.0",
-    "husky": "^8.0.1",
-    "jest": "^29.5.0",
-    "lint-staged": "^13.0.3",
-    "prettier": "^2.7.1",
-    "ts-jest": "^29.1.0",
-    "ts-loader": "^9.4.2",
-    "typescript": "^5.2.2"
-  }
-}
--- a/embedchain-js/tsconfig.build.json
+++ b/embedchain-js/tsconfig.build.json
@@ -1,4 +0,0 @@
-{
-  "extends": "./tsconfig.json",
-  "exclude": ["embedchain/__tests__"]
-}
--- a/embedchain-js/tsconfig.json
+++ b/embedchain-js/tsconfig.json
@@ -1,15 +0,0 @@
-{
-  "compilerOptions": {
-    "target": "es6",
-    "module": "CommonJS",
-    "strict": true,
-    "outDir": "dist",
-    "rootDir": "embedchain",
-    "sourceMap": true,
-    "declaration": true,
-    "declarationDir": "types",
-    "esModuleInterop": true
-  },
-  "include": ["embedchain/**/*.ts"],
-  "exclude": ["node_modules", "dist"]
-}
				`@@ -1 +0,0 @@`
				`module.exports = { extends: ['@commitlint/config-conventional'] };`
				`@@ -1 +0,0 @@`
				`export type DataType = 'pdf_file' \| 'web_page' \| 'qna_pair';`
				`@@ -1 +0,0 @@`
				`export type Method = 'init' \| 'query' \| 'add' \| 'add_local';`