From a7e1520d08b232b182a803918b6ce63c78ffadcf Mon Sep 17 00:00:00 2001 From: Deven Patel Date: Sat, 30 Dec 2023 13:39:38 +0530 Subject: [PATCH] [Improvements] update docs (#1079) Co-authored-by: Deven Patel --- docs/api-reference/pipeline/chat.mdx | 27 ++++++++++++++---- docs/api-reference/pipeline/query.mdx | 27 ++++++++++++++---- docs/components/data-sources/pdf-file.mdx | 34 +++++++++++++++++++++-- 3 files changed, 74 insertions(+), 14 deletions(-) diff --git a/docs/api-reference/pipeline/chat.mdx b/docs/api-reference/pipeline/chat.mdx index 6634457f..a8fe32e6 100644 --- a/docs/api-reference/pipeline/chat.mdx +++ b/docs/api-reference/pipeline/chat.mdx @@ -53,24 +53,39 @@ print(sources) # [ # ( # 'Elon Musk PROFILEElon MuskCEO, Tesla$247.1B$2.3B (0.96%)Real Time Net Worthas of 12/7/23 ...', -# {'url': 'https://www.forbes.com/profile/elon-musk', ...} +# { +# 'url': 'https://www.forbes.com/profile/elon-musk', +# 'score': 0.89, +# ... +# } # ), # ( # '74% of the company, which is now called X.Wealth HistoryHOVER TO REVEAL NET WORTH BY YEARForbes ...', -# {'url': 'https://www.forbes.com/profile/elon-musk', ...} +# { +# 'url': 'https://www.forbes.com/profile/elon-musk', +# 'score': 0.81, +# ... +# } # ), # ( # 'founded in 2002, is worth nearly $150 billion after a $750 million tender offer in June 2023 ...', -# {'url': 'https://www.forbes.com/profile/elon-musk', ...} +# { +# 'url': 'https://www.forbes.com/profile/elon-musk', +# 'score': 0.73, +# ... +# } # ) # ] ``` -When `citations=True`, note that the returned `sources` are a list of tuples where each tuple has three elements (in the following order): +When `citations=True`, note that the returned `sources` are a list of tuples where each tuple has two elements (in the following order): 1. source chunk -2. link of the source document -3. document id (used for book keeping purposes) +2. dictionary with metadata about the source chunk + - `url`: url of the source + - `doc_id`: document id (used for book keeping purposes) + - `score`: score of the source chunk with respect to the question + - other metadata you might have added at the time of adding the source diff --git a/docs/api-reference/pipeline/query.mdx b/docs/api-reference/pipeline/query.mdx index 2aa63737..f1d94aa8 100644 --- a/docs/api-reference/pipeline/query.mdx +++ b/docs/api-reference/pipeline/query.mdx @@ -53,24 +53,39 @@ print(sources) # [ # ( # 'Elon Musk PROFILEElon MuskCEO, Tesla$247.1B$2.3B (0.96%)Real Time Net Worthas of 12/7/23 ...', -# {'url': 'https://www.forbes.com/profile/elon-musk', ...} +# { +# 'url': 'https://www.forbes.com/profile/elon-musk', +# 'score': 0.89, +# ... +# } # ), # ( # '74% of the company, which is now called X.Wealth HistoryHOVER TO REVEAL NET WORTH BY YEARForbes ...', -# {'url': 'https://www.forbes.com/profile/elon-musk', ...} +# { +# 'url': 'https://www.forbes.com/profile/elon-musk', +# 'score': 0.81, +# ... +# } # ), # ( # 'founded in 2002, is worth nearly $150 billion after a $750 million tender offer in June 2023 ...', -# {'url': 'https://www.forbes.com/profile/elon-musk', ...} +# { +# 'url': 'https://www.forbes.com/profile/elon-musk', +# 'score': 0.73, +# ... +# } # ) # ] ``` -When `citations=True`, note that the returned `sources` are a list of tuples where each tuple has three elements (in the following order): +When `citations=True`, note that the returned `sources` are a list of tuples where each tuple has two elements (in the following order): 1. source chunk -2. link of the source document -3. document id (used for book keeping purposes) +2. dictionary with metadata about the source chunk + - `url`: url of the source + - `doc_id`: document id (used for book keeping purposes) + - `score`: score of the source chunk with respect to the question + - other metadata you might have added at the time of adding the source ### Without citations diff --git a/docs/components/data-sources/pdf-file.mdx b/docs/components/data-sources/pdf-file.mdx index fe8b8884..f3392092 100644 --- a/docs/components/data-sources/pdf-file.mdx +++ b/docs/components/data-sources/pdf-file.mdx @@ -10,8 +10,38 @@ from embedchain import App app = App() app.add('https://arxiv.org/pdf/1706.03762.pdf', data_type='pdf_file') -app.query("What is the paper 'attention is all you need' about?") -# Answer: The paper "Attention Is All You Need" proposes a new network architecture called the Transformer, which is based solely on attention mechanisms. It suggests moving away from complex recurrent or convolutional neural networks and instead using attention mechanisms to connect the encoder and decoder in sequence transduction models. +app.query("What is the paper 'attention is all you need' about?", citations=True) +# Answer: The paper "Attention Is All You Need" proposes a new network architecture called the Transformer, which is based solely on attention mechanisms. It suggests that complex recurrent or convolutional neural networks can be replaced with a simpler architecture that connects the encoder and decoder through attention. The paper discusses how this approach can improve sequence transduction models, such as neural machine translation. +# Contexts: +# [ +# ( +# 'Provided proper attribution is ...', +# { +# 'page': 0, +# 'url': 'https://arxiv.org/pdf/1706.03762.pdf', +# 'score': 0.3676220203221626, +# ... +# } +# ), +# ( +# 'Attention Visualizations Input ...', +# { +# 'page': 12, +# 'url': 'https://arxiv.org/pdf/1706.03762.pdf', +# 'score': 0.41679039679873736, +# ... +# } +# ), +# ( +# 'sequence learning ...', +# { +# 'page': 10, +# 'url': 'https://arxiv.org/pdf/1706.03762.pdf', +# 'score': 0.4188303600897153, +# ... +# } +# ) +# ] ``` Note that we do not support password protected pdfs. \ No newline at end of file