Formatting (#2750)
This commit is contained in:
@@ -33,10 +33,7 @@ class RAGManager:
|
||||
|
||||
def generate_response(self, question, context):
|
||||
template = Template(PROMPT)
|
||||
prompt = template.render(
|
||||
CONTEXT=context,
|
||||
QUESTION=question
|
||||
)
|
||||
prompt = template.render(CONTEXT=context, QUESTION=question)
|
||||
|
||||
max_retries = 3
|
||||
retries = 0
|
||||
@@ -47,19 +44,21 @@ class RAGManager:
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=[
|
||||
{"role": "system",
|
||||
"content": "You are a helpful assistant that can answer "
|
||||
"questions based on the provided context."
|
||||
"If the question involves timing, use the conversation date for reference."
|
||||
"Provide the shortest possible answer."
|
||||
"Use words directly from the conversation when possible."
|
||||
"Avoid using subjects in your answer."},
|
||||
{"role": "user", "content": prompt}
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful assistant that can answer "
|
||||
"questions based on the provided context."
|
||||
"If the question involves timing, use the conversation date for reference."
|
||||
"Provide the shortest possible answer."
|
||||
"Use words directly from the conversation when possible."
|
||||
"Avoid using subjects in your answer.",
|
||||
},
|
||||
{"role": "user", "content": prompt},
|
||||
],
|
||||
temperature=0
|
||||
temperature=0,
|
||||
)
|
||||
t2 = time.time()
|
||||
return response.choices[0].message.content.strip(), t2-t1
|
||||
return response.choices[0].message.content.strip(), t2 - t1
|
||||
except Exception as e:
|
||||
retries += 1
|
||||
if retries > max_retries:
|
||||
@@ -69,21 +68,16 @@ class RAGManager:
|
||||
def clean_chat_history(self, chat_history):
|
||||
cleaned_chat_history = ""
|
||||
for c in chat_history:
|
||||
cleaned_chat_history += (f"{c['timestamp']} | {c['speaker']}: "
|
||||
f"{c['text']}\n")
|
||||
cleaned_chat_history += f"{c['timestamp']} | {c['speaker']}: " f"{c['text']}\n"
|
||||
|
||||
return cleaned_chat_history
|
||||
|
||||
def calculate_embedding(self, document):
|
||||
response = self.client.embeddings.create(
|
||||
model=os.getenv("EMBEDDING_MODEL"),
|
||||
input=document
|
||||
)
|
||||
response = self.client.embeddings.create(model=os.getenv("EMBEDDING_MODEL"), input=document)
|
||||
return response.data[0].embedding
|
||||
|
||||
def calculate_similarity(self, embedding1, embedding2):
|
||||
return np.dot(embedding1, embedding2) / (
|
||||
np.linalg.norm(embedding1) * np.linalg.norm(embedding2))
|
||||
return np.dot(embedding1, embedding2) / (np.linalg.norm(embedding1) * np.linalg.norm(embedding2))
|
||||
|
||||
def search(self, query, chunks, embeddings, k=1):
|
||||
"""
|
||||
@@ -101,10 +95,7 @@ class RAGManager:
|
||||
"""
|
||||
t1 = time.time()
|
||||
query_embedding = self.calculate_embedding(query)
|
||||
similarities = [
|
||||
self.calculate_similarity(query_embedding, embedding)
|
||||
for embedding in embeddings
|
||||
]
|
||||
similarities = [self.calculate_similarity(query_embedding, embedding) for embedding in embeddings]
|
||||
|
||||
# Get indices of top-k most similar chunks
|
||||
if k == 1:
|
||||
@@ -118,7 +109,7 @@ class RAGManager:
|
||||
combined_chunks = "\n<->\n".join([chunks[i] for i in top_indices])
|
||||
|
||||
t2 = time.time()
|
||||
return combined_chunks, t2-t1
|
||||
return combined_chunks, t2 - t1
|
||||
|
||||
def create_chunks(self, chat_history, chunk_size=500):
|
||||
"""
|
||||
@@ -139,7 +130,7 @@ class RAGManager:
|
||||
|
||||
# Split into chunks based on token count
|
||||
for i in range(0, len(tokens), chunk_size):
|
||||
chunk_tokens = tokens[i:i+chunk_size]
|
||||
chunk_tokens = tokens[i : i + chunk_size]
|
||||
chunk = encoding.decode(chunk_tokens)
|
||||
chunks.append(chunk)
|
||||
|
||||
@@ -159,13 +150,9 @@ class RAGManager:
|
||||
chat_history = value["conversation"]
|
||||
questions = value["question"]
|
||||
|
||||
chunks, embeddings = self.create_chunks(
|
||||
chat_history, self.chunk_size
|
||||
)
|
||||
chunks, embeddings = self.create_chunks(chat_history, self.chunk_size)
|
||||
|
||||
for item in tqdm(
|
||||
questions, desc="Answering questions", leave=False
|
||||
):
|
||||
for item in tqdm(questions, desc="Answering questions", leave=False):
|
||||
question = item["question"]
|
||||
answer = item.get("answer", "")
|
||||
category = item["category"]
|
||||
@@ -174,22 +161,20 @@ class RAGManager:
|
||||
context = chunks[0]
|
||||
search_time = 0
|
||||
else:
|
||||
context, search_time = self.search(
|
||||
question, chunks, embeddings, k=self.k
|
||||
)
|
||||
response, response_time = self.generate_response(
|
||||
question, context
|
||||
)
|
||||
context, search_time = self.search(question, chunks, embeddings, k=self.k)
|
||||
response, response_time = self.generate_response(question, context)
|
||||
|
||||
FINAL_RESULTS[key].append({
|
||||
"question": question,
|
||||
"answer": answer,
|
||||
"category": category,
|
||||
"context": context,
|
||||
"response": response,
|
||||
"search_time": search_time,
|
||||
"response_time": response_time,
|
||||
})
|
||||
FINAL_RESULTS[key].append(
|
||||
{
|
||||
"question": question,
|
||||
"answer": answer,
|
||||
"category": category,
|
||||
"context": context,
|
||||
"response": response,
|
||||
"search_time": search_time,
|
||||
"response_time": response_time,
|
||||
}
|
||||
)
|
||||
with open(output_file_path, "w+") as f:
|
||||
json.dump(FINAL_RESULTS, f, indent=4)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user