Add support for image dataset (#571)

Co-authored-by: Rupesh Bansal <rupeshbansal@Shankars-MacBook-Air.local>
This commit is contained in:
Rupesh Bansal
2023-10-04 09:50:40 +05:30
committed by GitHub
parent 55e9a1cbd6
commit d0af018b8d
19 changed files with 498 additions and 31 deletions

View File

@@ -66,3 +66,6 @@ class BaseChunker(JSONSerializable):
self.data_type = data_type
# TODO: This should be done during initialization. This means it has to be done in the child classes.
def get_word_count(self, documents):
return sum([len(document.split(" ")) for document in documents])