Formatting (#2750)

2025-05-22 01:17:29 +05:30
parent dff91154a7
commit d85fcda037
71 changed files with 1391 additions and 1823 deletions
--- a/evaluation/src/langmem.py
+++ b/evaluation/src/langmem.py
@@ -28,14 +28,12 @@ def get_answer(question, speaker_1_user_id, speaker_1_memories, speaker_2_user_i
        speaker_1_user_id=speaker_1_user_id,
        speaker_1_memories=speaker_1_memories,
        speaker_2_user_id=speaker_2_user_id,
-        speaker_2_memories=speaker_2_memories
+        speaker_2_memories=speaker_2_memories,
    )

    t1 = time.time()
    response = client.chat.completions.create(
-        model=os.getenv("MODEL"),
-        messages=[{"role": "system", "content": prompt}],
-        temperature=0.0
+        model=os.getenv("MODEL"), messages=[{"role": "system", "content": prompt}], temperature=0.0
    )
    t2 = time.time()
    return response.choices[0].message.content, t2 - t1
@@ -59,7 +57,9 @@ def prompt(state):


 class LangMem:
-    def __init__(self,):
+    def __init__(
+        self,
+    ):
        self.store = InMemoryStore(
            index={
                "dims": 1536,
@@ -80,18 +80,12 @@ class LangMem:
        )

    def add_memory(self, message, config):
-        return self.agent.invoke(
-            {"messages": [{"role": "user", "content": message}]},
-            config=config
-        )
+        return self.agent.invoke({"messages": [{"role": "user", "content": message}]}, config=config)

    def search_memory(self, query, config):
        try:
            t1 = time.time()
-            response = self.agent.invoke(
-                {"messages": [{"role": "user", "content": query}]},
-                config=config
-            )
+            response = self.agent.invoke({"messages": [{"role": "user", "content": query}]}, config=config)
            t2 = time.time()
            return response["messages"][-1].content, t2 - t1
        except Exception as e:
@@ -102,7 +96,7 @@ class LangMem:
 class LangMemManager:
    def __init__(self, dataset_path):
        self.dataset_path = dataset_path
-        with open(self.dataset_path, 'r') as f:
+        with open(self.dataset_path, "r") as f:
            self.data = json.load(f)

    def process_all_conversations(self, output_file_path):
@@ -123,7 +117,7 @@ class LangMemManager:

            # Identify speakers
            for conv in chat_history:
-                speakers.add(conv['speaker'])
+                speakers.add(conv["speaker"])

            if len(speakers) != 2:
                raise ValueError(f"Expected 2 speakers, got {len(speakers)}")
@@ -134,50 +128,52 @@ class LangMemManager:
            # Add memories for each message
            for conv in tqdm(chat_history, desc=f"Processing messages {key}", leave=False):
                message = f"{conv['timestamp']} | {conv['speaker']}: {conv['text']}"
-                if conv['speaker'] == speaker1:
+                if conv["speaker"] == speaker1:
                    agent1.add_memory(message, config)
-                elif conv['speaker'] == speaker2:
+                elif conv["speaker"] == speaker2:
                    agent2.add_memory(message, config)
                else:
                    raise ValueError(f"Expected speaker1 or speaker2, got {conv['speaker']}")

            # Process questions
            for q in tqdm(questions, desc=f"Processing questions {key}", leave=False):
-                category = q['category']
+                category = q["category"]

                if int(category) == 5:
                    continue

-                answer = q['answer']
-                question = q['question']
+                answer = q["answer"]
+                question = q["question"]
                response1, speaker1_memory_time = agent1.search_memory(question, config)
                response2, speaker2_memory_time = agent2.search_memory(question, config)

-                generated_answer, response_time = get_answer(
-                    question, speaker1, response1, speaker2, response2
-                )
+                generated_answer, response_time = get_answer(question, speaker1, response1, speaker2, response2)

-                result[key].append({
-                    "question": question,
-                    "answer": answer,
-                    "response1": response1,
-                    "response2": response2,
-                    "category": category,
-                    "speaker1_memory_time": speaker1_memory_time,
-                    "speaker2_memory_time": speaker2_memory_time,
-                    "response_time": response_time,
-                    'response': generated_answer
-                })
+                result[key].append(
+                    {
+                        "question": question,
+                        "answer": answer,
+                        "response1": response1,
+                        "response2": response2,
+                        "category": category,
+                        "speaker1_memory_time": speaker1_memory_time,
+                        "speaker2_memory_time": speaker2_memory_time,
+                        "response_time": response_time,
+                        "response": generated_answer,
+                    }
+                )

            return result

        # Use multiprocessing to process conversations in parallel
        with mp.Pool(processes=10) as pool:
-            results = list(tqdm(
-                pool.imap(process_conversation, list(self.data.items())),
-                total=len(self.data),
-                desc="Processing conversations"
-            ))
+            results = list(
+                tqdm(
+                    pool.imap(process_conversation, list(self.data.items())),
+                    total=len(self.data),
+                    desc="Processing conversations",
+                )
+            )

        # Combine results from all workers
        for result in results:
@@ -185,5 +181,5 @@ class LangMemManager:
                OUTPUT[key].extend(items)

        # Save final results
-        with open(output_file_path, 'w') as f:
+        with open(output_file_path, "w") as f:
            json.dump(OUTPUT, f, indent=4)
--- a/evaluation/src/memzero/add.py
+++ b/evaluation/src/memzero/add.py
@@ -13,7 +13,7 @@ load_dotenv()


 # Update custom instructions
-custom_instructions ="""
+custom_instructions = """
 Generate personal memories that follow these guidelines:

 1. Each memory should be self-contained with complete context, including:
@@ -47,7 +47,7 @@ class MemoryADD:
        self.mem0_client = MemoryClient(
            api_key=os.getenv("MEM0_API_KEY"),
            org_id=os.getenv("MEM0_ORGANIZATION_ID"),
-            project_id=os.getenv("MEM0_PROJECT_ID")
+            project_id=os.getenv("MEM0_PROJECT_ID"),
        )

        self.mem0_client.update_project(custom_instructions=custom_instructions)
@@ -59,15 +59,16 @@ class MemoryADD:
            self.load_data()

    def load_data(self):
-        with open(self.data_path, 'r') as f:
+        with open(self.data_path, "r") as f:
            self.data = json.load(f)
        return self.data

    def add_memory(self, user_id, message, metadata, retries=3):
        for attempt in range(retries):
            try:
-                _ = self.mem0_client.add(message, user_id=user_id, version="v2",
-                                         metadata=metadata, enable_graph=self.is_graph)
+                _ = self.mem0_client.add(
+                    message, user_id=user_id, version="v2", metadata=metadata, enable_graph=self.is_graph
+                )
                return
            except Exception as e:
                if attempt < retries - 1:
@@ -78,13 +79,13 @@ class MemoryADD:

    def add_memories_for_speaker(self, speaker, messages, timestamp, desc):
        for i in tqdm(range(0, len(messages), self.batch_size), desc=desc):
-            batch_messages = messages[i:i+self.batch_size]
+            batch_messages = messages[i : i + self.batch_size]
            self.add_memory(speaker, batch_messages, metadata={"timestamp": timestamp})

    def process_conversation(self, item, idx):
-        conversation = item['conversation']
-        speaker_a = conversation['speaker_a']
-        speaker_b = conversation['speaker_b']
+        conversation = item["conversation"]
+        speaker_a = conversation["speaker_a"]
+        speaker_b = conversation["speaker_b"]

        speaker_a_user_id = f"{speaker_a}_{idx}"
        speaker_b_user_id = f"{speaker_b}_{idx}"
@@ -94,7 +95,7 @@ class MemoryADD:
        self.mem0_client.delete_all(user_id=speaker_b_user_id)

        for key in conversation.keys():
-            if key in ['speaker_a', 'speaker_b'] or "date" in key or "timestamp" in key:
+            if key in ["speaker_a", "speaker_b"] or "date" in key or "timestamp" in key:
                continue

            date_time_key = key + "_date_time"
@@ -104,10 +105,10 @@ class MemoryADD:
            messages = []
            messages_reverse = []
            for chat in chats:
-                if chat['speaker'] == speaker_a:
+                if chat["speaker"] == speaker_a:
                    messages.append({"role": "user", "content": f"{speaker_a}: {chat['text']}"})
                    messages_reverse.append({"role": "assistant", "content": f"{speaker_a}: {chat['text']}"})
-                elif chat['speaker'] == speaker_b:
+                elif chat["speaker"] == speaker_b:
                    messages.append({"role": "assistant", "content": f"{speaker_b}: {chat['text']}"})
                    messages_reverse.append({"role": "user", "content": f"{speaker_b}: {chat['text']}"})
                else:
@@ -116,11 +117,11 @@ class MemoryADD:
            # add memories for the two users on different threads
            thread_a = threading.Thread(
                target=self.add_memories_for_speaker,
-                args=(speaker_a_user_id, messages, timestamp, "Adding Memories for Speaker A")
+                args=(speaker_a_user_id, messages, timestamp, "Adding Memories for Speaker A"),
            )
            thread_b = threading.Thread(
                target=self.add_memories_for_speaker,
-                args=(speaker_b_user_id, messages_reverse, timestamp, "Adding Memories for Speaker B")
+                args=(speaker_b_user_id, messages_reverse, timestamp, "Adding Memories for Speaker B"),
            )

            thread_a.start()
@@ -134,10 +135,7 @@ class MemoryADD:
        if not self.data:
            raise ValueError("No data loaded. Please set data_path and call load_data() first.")
        with ThreadPoolExecutor(max_workers=max_workers) as executor:
-            futures = [
-                executor.submit(self.process_conversation, item, idx)
-                for idx, item in enumerate(self.data)
-            ]
+            futures = [executor.submit(self.process_conversation, item, idx) for idx, item in enumerate(self.data)]

            for future in futures:
-                future.result()
+                future.result()
--- a/evaluation/src/memzero/search.py
+++ b/evaluation/src/memzero/search.py
@@ -16,12 +16,11 @@ load_dotenv()


 class MemorySearch:
-
-    def __init__(self, output_path='results.json', top_k=10, filter_memories=False, is_graph=False):
+    def __init__(self, output_path="results.json", top_k=10, filter_memories=False, is_graph=False):
        self.mem0_client = MemoryClient(
            api_key=os.getenv("MEM0_API_KEY"),
            org_id=os.getenv("MEM0_ORGANIZATION_ID"),
-            project_id=os.getenv("MEM0_PROJECT_ID")
+            project_id=os.getenv("MEM0_PROJECT_ID"),
        )
        self.top_k = top_k
        self.openai_client = OpenAI()
@@ -42,11 +41,18 @@ class MemorySearch:
            try:
                if self.is_graph:
                    print("Searching with graph")
-                    memories = self.mem0_client.search(query, user_id=user_id, top_k=self.top_k,
-                                                filter_memories=self.filter_memories, enable_graph=True, output_format='v1.1')
+                    memories = self.mem0_client.search(
+                        query,
+                        user_id=user_id,
+                        top_k=self.top_k,
+                        filter_memories=self.filter_memories,
+                        enable_graph=True,
+                        output_format="v1.1",
+                    )
                else:
-                    memories = self.mem0_client.search(query, user_id=user_id, top_k=self.top_k,
-                                                filter_memories=self.filter_memories)
+                    memories = self.mem0_client.search(
+                        query, user_id=user_id, top_k=self.top_k, filter_memories=self.filter_memories
+                    )
                break
            except Exception as e:
                print("Retrying...")
@@ -57,64 +63,86 @@ class MemorySearch:

        end_time = time.time()
        if not self.is_graph:
-            semantic_memories = [{'memory': memory['memory'],
-                        'timestamp': memory['metadata']['timestamp'],
-                        'score': round(memory['score'], 2)}
-                    for memory in memories]
+            semantic_memories = [
+                {
+                    "memory": memory["memory"],
+                    "timestamp": memory["metadata"]["timestamp"],
+                    "score": round(memory["score"], 2),
+                }
+                for memory in memories
+            ]
            graph_memories = None
        else:
-            semantic_memories = [{'memory': memory['memory'],
-                        'timestamp': memory['metadata']['timestamp'],
-                        'score': round(memory['score'], 2)} for memory in memories['results']]
-            graph_memories = [{"source": relation['source'], "relationship": relation['relationship'], "target": relation['target']} for relation in memories['relations']]
+            semantic_memories = [
+                {
+                    "memory": memory["memory"],
+                    "timestamp": memory["metadata"]["timestamp"],
+                    "score": round(memory["score"], 2),
+                }
+                for memory in memories["results"]
+            ]
+            graph_memories = [
+                {"source": relation["source"], "relationship": relation["relationship"], "target": relation["target"]}
+                for relation in memories["relations"]
+            ]
        return semantic_memories, graph_memories, end_time - start_time

    def answer_question(self, speaker_1_user_id, speaker_2_user_id, question, answer, category):
-        speaker_1_memories, speaker_1_graph_memories, speaker_1_memory_time = self.search_memory(speaker_1_user_id, question)
-        speaker_2_memories, speaker_2_graph_memories, speaker_2_memory_time = self.search_memory(speaker_2_user_id, question)
+        speaker_1_memories, speaker_1_graph_memories, speaker_1_memory_time = self.search_memory(
+            speaker_1_user_id, question
+        )
+        speaker_2_memories, speaker_2_graph_memories, speaker_2_memory_time = self.search_memory(
+            speaker_2_user_id, question
+        )

-        search_1_memory = [f"{item['timestamp']}: {item['memory']}" 
-                        for item in speaker_1_memories]
-        search_2_memory = [f"{item['timestamp']}: {item['memory']}" 
-                          for item in speaker_2_memories]
+        search_1_memory = [f"{item['timestamp']}: {item['memory']}" for item in speaker_1_memories]
+        search_2_memory = [f"{item['timestamp']}: {item['memory']}" for item in speaker_2_memories]

        template = Template(self.ANSWER_PROMPT)
        answer_prompt = template.render(
-            speaker_1_user_id=speaker_1_user_id.split('_')[0],
-            speaker_2_user_id=speaker_2_user_id.split('_')[0],
+            speaker_1_user_id=speaker_1_user_id.split("_")[0],
+            speaker_2_user_id=speaker_2_user_id.split("_")[0],
            speaker_1_memories=json.dumps(search_1_memory, indent=4),
            speaker_2_memories=json.dumps(search_2_memory, indent=4),
            speaker_1_graph_memories=json.dumps(speaker_1_graph_memories, indent=4),
            speaker_2_graph_memories=json.dumps(speaker_2_graph_memories, indent=4),
-            question=question
+            question=question,
        )

        t1 = time.time()
        response = self.openai_client.chat.completions.create(
-            model=os.getenv("MODEL"),
-            messages=[
-                {"role": "system", "content": answer_prompt}
-            ],
-            temperature=0.0
+            model=os.getenv("MODEL"), messages=[{"role": "system", "content": answer_prompt}], temperature=0.0
        )
        t2 = time.time()
        response_time = t2 - t1
-        return response.choices[0].message.content, speaker_1_memories, speaker_2_memories, speaker_1_memory_time, speaker_2_memory_time, speaker_1_graph_memories, speaker_2_graph_memories, response_time
+        return (
+            response.choices[0].message.content,
+            speaker_1_memories,
+            speaker_2_memories,
+            speaker_1_memory_time,
+            speaker_2_memory_time,
+            speaker_1_graph_memories,
+            speaker_2_graph_memories,
+            response_time,
+        )

    def process_question(self, val, speaker_a_user_id, speaker_b_user_id):
-        question = val.get('question', '')
-        answer = val.get('answer', '')
-        category = val.get('category', -1)
-        evidence = val.get('evidence', [])
-        adversarial_answer = val.get('adversarial_answer', '')
+        question = val.get("question", "")
+        answer = val.get("answer", "")
+        category = val.get("category", -1)
+        evidence = val.get("evidence", [])
+        adversarial_answer = val.get("adversarial_answer", "")

-        response, speaker_1_memories, speaker_2_memories, speaker_1_memory_time, speaker_2_memory_time, speaker_1_graph_memories, speaker_2_graph_memories, response_time = self.answer_question(
-            speaker_a_user_id,
-            speaker_b_user_id,
-            question,
-            answer,
-            category
-        )
+        (
+            response,
+            speaker_1_memories,
+            speaker_2_memories,
+            speaker_1_memory_time,
+            speaker_2_memory_time,
+            speaker_1_graph_memories,
+            speaker_2_graph_memories,
+            response_time,
+        ) = self.answer_question(speaker_a_user_id, speaker_b_user_id, question, answer, category)

        result = {
            "question": question,
@@ -125,67 +153,63 @@ class MemorySearch:
            "adversarial_answer": adversarial_answer,
            "speaker_1_memories": speaker_1_memories,
            "speaker_2_memories": speaker_2_memories,
-            'num_speaker_1_memories': len(speaker_1_memories),
-            'num_speaker_2_memories': len(speaker_2_memories),
-            'speaker_1_memory_time': speaker_1_memory_time,
-            'speaker_2_memory_time': speaker_2_memory_time,
+            "num_speaker_1_memories": len(speaker_1_memories),
+            "num_speaker_2_memories": len(speaker_2_memories),
+            "speaker_1_memory_time": speaker_1_memory_time,
+            "speaker_2_memory_time": speaker_2_memory_time,
            "speaker_1_graph_memories": speaker_1_graph_memories,
            "speaker_2_graph_memories": speaker_2_graph_memories,
-            "response_time": response_time
+            "response_time": response_time,
        }

        # Save results after each question is processed
-        with open(self.output_path, 'w') as f:
+        with open(self.output_path, "w") as f:
            json.dump(self.results, f, indent=4)

        return result

    def process_data_file(self, file_path):
-        with open(file_path, 'r') as f:
+        with open(file_path, "r") as f:
            data = json.load(f)

        for idx, item in tqdm(enumerate(data), total=len(data), desc="Processing conversations"):
-            qa = item['qa']
-            conversation = item['conversation']
-            speaker_a = conversation['speaker_a']
-            speaker_b = conversation['speaker_b']
+            qa = item["qa"]
+            conversation = item["conversation"]
+            speaker_a = conversation["speaker_a"]
+            speaker_b = conversation["speaker_b"]

            speaker_a_user_id = f"{speaker_a}_{idx}"
            speaker_b_user_id = f"{speaker_b}_{idx}"

-            for question_item in tqdm(qa, total=len(qa), desc=f"Processing questions for conversation {idx}", leave=False):
-                result = self.process_question(
-                    question_item,
-                    speaker_a_user_id,
-                    speaker_b_user_id
-                )
+            for question_item in tqdm(
+                qa, total=len(qa), desc=f"Processing questions for conversation {idx}", leave=False
+            ):
+                result = self.process_question(question_item, speaker_a_user_id, speaker_b_user_id)
                self.results[idx].append(result)

                # Save results after each question is processed
-                with open(self.output_path, 'w') as f:
+                with open(self.output_path, "w") as f:
                    json.dump(self.results, f, indent=4)

        # Final save at the end
-        with open(self.output_path, 'w') as f:
+        with open(self.output_path, "w") as f:
            json.dump(self.results, f, indent=4)

    def process_questions_parallel(self, qa_list, speaker_a_user_id, speaker_b_user_id, max_workers=1):
        def process_single_question(val):
            result = self.process_question(val, speaker_a_user_id, speaker_b_user_id)
            # Save results after each question is processed
-            with open(self.output_path, 'w') as f:
+            with open(self.output_path, "w") as f:
                json.dump(self.results, f, indent=4)
            return result

        with ThreadPoolExecutor(max_workers=max_workers) as executor:
-            results = list(tqdm(
-                executor.map(process_single_question, qa_list),
-                total=len(qa_list),
-                desc="Answering Questions"
-            ))
+            results = list(
+                tqdm(executor.map(process_single_question, qa_list), total=len(qa_list), desc="Answering Questions")
+            )

        # Final save at the end
-        with open(self.output_path, 'w') as f:
+        with open(self.output_path, "w") as f:
            json.dump(self.results, f, indent=4)

        return results
--- a/evaluation/src/openai/predict.py
+++ b/evaluation/src/openai/predict.py
@@ -59,23 +59,19 @@ class OpenAIPredict:
        self.results = defaultdict(list)

    def search_memory(self, idx):
-
-        with open(f'memories/{idx}.txt', 'r') as file:
+        with open(f"memories/{idx}.txt", "r") as file:
            memories = file.read()

        return memories, 0

    def process_question(self, val, idx):
-        question = val.get('question', '')
-        answer = val.get('answer', '')
-        category = val.get('category', -1)
-        evidence = val.get('evidence', [])
-        adversarial_answer = val.get('adversarial_answer', '')
+        question = val.get("question", "")
+        answer = val.get("answer", "")
+        category = val.get("category", -1)
+        evidence = val.get("evidence", [])
+        adversarial_answer = val.get("adversarial_answer", "")

-        response, search_memory_time, response_time, context = self.answer_question(
-            idx,
-            question
-        )
+        response, search_memory_time, response_time, context = self.answer_question(idx, question)

        result = {
            "question": question,
@@ -86,7 +82,7 @@ class OpenAIPredict:
            "adversarial_answer": adversarial_answer,
            "search_memory_time": search_memory_time,
            "response_time": response_time,
-            "context": context
+            "context": context,
        }

        return result
@@ -95,43 +91,35 @@ class OpenAIPredict:
        memories, search_memory_time = self.search_memory(idx)

        template = Template(ANSWER_PROMPT)
-        answer_prompt = template.render(
-            memories=memories,
-            question=question
-        )
+        answer_prompt = template.render(memories=memories, question=question)

        t1 = time.time()
        response = self.openai_client.chat.completions.create(
-            model=os.getenv("MODEL"),
-            messages=[
-                {"role": "system", "content": answer_prompt}
-            ],
-            temperature=0.0
+            model=os.getenv("MODEL"), messages=[{"role": "system", "content": answer_prompt}], temperature=0.0
        )
        t2 = time.time()
        response_time = t2 - t1
        return response.choices[0].message.content, search_memory_time, response_time, memories

    def process_data_file(self, file_path, output_file_path):
-        with open(file_path, 'r') as f:
+        with open(file_path, "r") as f:
            data = json.load(f)

        for idx, item in tqdm(enumerate(data), total=len(data), desc="Processing conversations"):
-            qa = item['qa']
+            qa = item["qa"]

-            for question_item in tqdm(qa, total=len(qa), desc=f"Processing questions for conversation {idx}", leave=False):
-                result = self.process_question(
-                    question_item,
-                    idx
-                )
+            for question_item in tqdm(
+                qa, total=len(qa), desc=f"Processing questions for conversation {idx}", leave=False
+            ):
+                result = self.process_question(question_item, idx)
                self.results[idx].append(result)

                # Save results after each question is processed
-                with open(output_file_path, 'w') as f:
+                with open(output_file_path, "w") as f:
                    json.dump(self.results, f, indent=4)

        # Final save at the end
-        with open(output_file_path, 'w') as f:
+        with open(output_file_path, "w") as f:
            json.dump(self.results, f, indent=4)


@@ -141,4 +129,3 @@ if __name__ == "__main__":
    args = parser.parse_args()
    openai_predict = OpenAIPredict()
    openai_predict.process_data_file("../../dataset/locomo10.json", args.output_file_path)
-
--- a/evaluation/src/rag.py
+++ b/evaluation/src/rag.py
@@ -33,10 +33,7 @@ class RAGManager:

    def generate_response(self, question, context):
        template = Template(PROMPT)
-        prompt = template.render(
-            CONTEXT=context,
-            QUESTION=question
-        )
+        prompt = template.render(CONTEXT=context, QUESTION=question)

        max_retries = 3
        retries = 0
@@ -47,19 +44,21 @@ class RAGManager:
                response = self.client.chat.completions.create(
                    model=self.model,
                    messages=[
-                        {"role": "system",
-                         "content": "You are a helpful assistant that can answer "
-                                    "questions based on the provided context."
-                                    "If the question involves timing, use the conversation date for reference."
-                                    "Provide the shortest possible answer."
-                                    "Use words directly from the conversation when possible."
-                                    "Avoid using subjects in your answer."},
-                        {"role": "user", "content": prompt}
+                        {
+                            "role": "system",
+                            "content": "You are a helpful assistant that can answer "
+                            "questions based on the provided context."
+                            "If the question involves timing, use the conversation date for reference."
+                            "Provide the shortest possible answer."
+                            "Use words directly from the conversation when possible."
+                            "Avoid using subjects in your answer.",
+                        },
+                        {"role": "user", "content": prompt},
                    ],
-                    temperature=0
+                    temperature=0,
                )
                t2 = time.time()
-                return response.choices[0].message.content.strip(), t2-t1
+                return response.choices[0].message.content.strip(), t2 - t1
            except Exception as e:
                retries += 1
                if retries > max_retries:
@@ -69,21 +68,16 @@ class RAGManager:
    def clean_chat_history(self, chat_history):
        cleaned_chat_history = ""
        for c in chat_history:
-            cleaned_chat_history += (f"{c['timestamp']} | {c['speaker']}: "
-                                     f"{c['text']}\n")
+            cleaned_chat_history += f"{c['timestamp']} | {c['speaker']}: " f"{c['text']}\n"

        return cleaned_chat_history

    def calculate_embedding(self, document):
-        response = self.client.embeddings.create(
-            model=os.getenv("EMBEDDING_MODEL"),
-            input=document
-        )
+        response = self.client.embeddings.create(model=os.getenv("EMBEDDING_MODEL"), input=document)
        return response.data[0].embedding

    def calculate_similarity(self, embedding1, embedding2):
-        return np.dot(embedding1, embedding2) / (
-            np.linalg.norm(embedding1) * np.linalg.norm(embedding2))
+        return np.dot(embedding1, embedding2) / (np.linalg.norm(embedding1) * np.linalg.norm(embedding2))

    def search(self, query, chunks, embeddings, k=1):
        """
@@ -101,10 +95,7 @@ class RAGManager:
        """
        t1 = time.time()
        query_embedding = self.calculate_embedding(query)
-        similarities = [
-            self.calculate_similarity(query_embedding, embedding) 
-            for embedding in embeddings
-        ]
+        similarities = [self.calculate_similarity(query_embedding, embedding) for embedding in embeddings]

        # Get indices of top-k most similar chunks
        if k == 1:
@@ -118,7 +109,7 @@ class RAGManager:
        combined_chunks = "\n<->\n".join([chunks[i] for i in top_indices])

        t2 = time.time()
-        return combined_chunks, t2-t1
+        return combined_chunks, t2 - t1

    def create_chunks(self, chat_history, chunk_size=500):
        """
@@ -139,7 +130,7 @@ class RAGManager:

        # Split into chunks based on token count
        for i in range(0, len(tokens), chunk_size):
-            chunk_tokens = tokens[i:i+chunk_size]
+            chunk_tokens = tokens[i : i + chunk_size]
            chunk = encoding.decode(chunk_tokens)
            chunks.append(chunk)

@@ -159,13 +150,9 @@ class RAGManager:
            chat_history = value["conversation"]
            questions = value["question"]

-            chunks, embeddings = self.create_chunks(
-                chat_history, self.chunk_size
-            )
+            chunks, embeddings = self.create_chunks(chat_history, self.chunk_size)

-            for item in tqdm(
-                questions, desc="Answering questions", leave=False
-            ):
+            for item in tqdm(questions, desc="Answering questions", leave=False):
                question = item["question"]
                answer = item.get("answer", "")
                category = item["category"]
@@ -174,22 +161,20 @@ class RAGManager:
                    context = chunks[0]
                    search_time = 0
                else:
-                    context, search_time = self.search(
-                        question, chunks, embeddings, k=self.k
-                    )
-                response, response_time = self.generate_response(
-                    question, context
-                )
+                    context, search_time = self.search(question, chunks, embeddings, k=self.k)
+                response, response_time = self.generate_response(question, context)

-                FINAL_RESULTS[key].append({
-                    "question": question,
-                    "answer": answer,
-                    "category": category,
-                    "context": context,
-                    "response": response,
-                    "search_time": search_time,
-                    "response_time": response_time,
-                })
+                FINAL_RESULTS[key].append(
+                    {
+                        "question": question,
+                        "answer": answer,
+                        "category": category,
+                        "context": context,
+                        "response": response,
+                        "search_time": search_time,
+                        "response_time": response_time,
+                    }
+                )
                with open(output_file_path, "w+") as f:
                    json.dump(FINAL_RESULTS, f, indent=4)

--- a/evaluation/src/utils.py
+++ b/evaluation/src/utils.py
@@ -1,12 +1,3 @@
-TECHNIQUES = [
-    "mem0",
-    "rag",
-    "langmem",
-    "zep",
-    "openai"
-]
+TECHNIQUES = ["mem0", "rag", "langmem", "zep", "openai"]

-METHODS = [
-    "add",
-    "search"
-]
+METHODS = ["add", "search"]
--- a/evaluation/src/zep/add.py
+++ b/evaluation/src/zep/add.py
@@ -19,12 +19,12 @@ class ZepAdd:
            self.load_data()

    def load_data(self):
-        with open(self.data_path, 'r') as f:
+        with open(self.data_path, "r") as f:
            self.data = json.load(f)
        return self.data

    def process_conversation(self, run_id, item, idx):
-        conversation = item['conversation']
+        conversation = item["conversation"]

        user_id = f"run_id_{run_id}_experiment_user_{idx}"
        session_id = f"run_id_{run_id}_experiment_session_{idx}"
@@ -41,7 +41,7 @@ class ZepAdd:

        print("Starting to add memories... for user", user_id)
        for key in tqdm(conversation.keys(), desc=f"Processing user {user_id}"):
-            if key in ['speaker_a', 'speaker_b'] or "date" in key:
+            if key in ["speaker_a", "speaker_b"] or "date" in key:
                continue

            date_time_key = key + "_date_time"
@@ -51,11 +51,13 @@ class ZepAdd:
            for chat in tqdm(chats, desc=f"Adding chats for {key}", leave=False):
                self.zep_client.memory.add(
                    session_id=session_id,
-                    messages=[Message(
-                        role=chat['speaker'],
-                        role_type="user",
-                        content=f"{timestamp}: {chat['text']}",
-                    )]
+                    messages=[
+                        Message(
+                            role=chat["speaker"],
+                            role_type="user",
+                            content=f"{timestamp}: {chat['text']}",
+                        )
+                    ],
                )

    def process_all_conversations(self, run_id):
@@ -71,4 +73,4 @@ if __name__ == "__main__":
    parser.add_argument("--run_id", type=str, required=True)
    args = parser.parse_args()
    zep_add = ZepAdd(data_path="../../dataset/locomo10.json")
-    zep_add.process_all_conversations(args.run_id)
+    zep_add.process_all_conversations(args.run_id)
--- a/evaluation/src/zep/search.py
+++ b/evaluation/src/zep/search.py
@@ -42,9 +42,9 @@ class ZepSearch:
        return f"{edge.valid_at if edge.valid_at else 'date unknown'} - {(edge.invalid_at if edge.invalid_at else 'present')}"

    def compose_search_context(self, edges: list[EntityEdge], nodes: list[EntityNode]) -> str:
-        facts = [f'  - {edge.fact} ({self.format_edge_date_range(edge)})' for edge in edges]
-        entities = [f'  - {node.name}: {node.summary}' for node in nodes]
-        return TEMPLATE.format(facts='\n'.join(facts), entities='\n'.join(entities))
+        facts = [f"  - {edge.fact} ({self.format_edge_date_range(edge)})" for edge in edges]
+        entities = [f"  - {node.name}: {node.summary}" for node in nodes]
+        return TEMPLATE.format(facts="\n".join(facts), entities="\n".join(entities))

    def search_memory(self, run_id, idx, query, max_retries=3, retry_delay=1):
        start_time = time.time()
@@ -52,8 +52,14 @@ class ZepSearch:
        while retries < max_retries:
            try:
                user_id = f"run_id_{run_id}_experiment_user_{idx}"
-                edges_results = (self.zep_client.graph.search(user_id=user_id, reranker='cross_encoder', query=query, scope='edges', limit=20)).edges
-                node_results = (self.zep_client.graph.search(user_id=user_id, reranker='rrf', query=query, scope='nodes', limit=20)).nodes
+                edges_results = (
+                    self.zep_client.graph.search(
+                        user_id=user_id, reranker="cross_encoder", query=query, scope="edges", limit=20
+                    )
+                ).edges
+                node_results = (
+                    self.zep_client.graph.search(user_id=user_id, reranker="rrf", query=query, scope="nodes", limit=20)
+                ).nodes
                context = self.compose_search_context(edges_results, node_results)
                break
            except Exception as e:
@@ -68,17 +74,13 @@ class ZepSearch:
        return context, end_time - start_time

    def process_question(self, run_id, val, idx):
-        question = val.get('question', '')
-        answer = val.get('answer', '')
-        category = val.get('category', -1)
-        evidence = val.get('evidence', [])
-        adversarial_answer = val.get('adversarial_answer', '')
+        question = val.get("question", "")
+        answer = val.get("answer", "")
+        category = val.get("category", -1)
+        evidence = val.get("evidence", [])
+        adversarial_answer = val.get("adversarial_answer", "")

-        response, search_memory_time, response_time, context = self.answer_question(
-            run_id,
-            idx,
-            question
-        )
+        response, search_memory_time, response_time, context = self.answer_question(run_id, idx, question)

        result = {
            "question": question,
@@ -89,7 +91,7 @@ class ZepSearch:
            "adversarial_answer": adversarial_answer,
            "search_memory_time": search_memory_time,
            "response_time": response_time,
-            "context": context
+            "context": context,
        }

        return result
@@ -98,44 +100,35 @@ class ZepSearch:
        context, search_memory_time = self.search_memory(run_id, idx, question)

        template = Template(ANSWER_PROMPT_ZEP)
-        answer_prompt = template.render(
-            memories=context,
-            question=question
-        )
+        answer_prompt = template.render(memories=context, question=question)

        t1 = time.time()
        response = self.openai_client.chat.completions.create(
-            model=os.getenv("MODEL"),
-            messages=[
-                {"role": "system", "content": answer_prompt}
-            ],
-            temperature=0.0
+            model=os.getenv("MODEL"), messages=[{"role": "system", "content": answer_prompt}], temperature=0.0
        )
        t2 = time.time()
        response_time = t2 - t1
        return response.choices[0].message.content, search_memory_time, response_time, context

    def process_data_file(self, file_path, run_id, output_file_path):
-        with open(file_path, 'r') as f:
+        with open(file_path, "r") as f:
            data = json.load(f)

        for idx, item in tqdm(enumerate(data), total=len(data), desc="Processing conversations"):
-            qa = item['qa']
+            qa = item["qa"]

-            for question_item in tqdm(qa, total=len(qa), desc=f"Processing questions for conversation {idx}", leave=False):
-                result = self.process_question(
-                    run_id,
-                    question_item,
-                    idx
-                )
+            for question_item in tqdm(
+                qa, total=len(qa), desc=f"Processing questions for conversation {idx}", leave=False
+            ):
+                result = self.process_question(run_id, question_item, idx)
                self.results[idx].append(result)

                # Save results after each question is processed
-                with open(output_file_path, 'w') as f:
+                with open(output_file_path, "w") as f:
                    json.dump(self.results, f, indent=4)

        # Final save at the end
-        with open(output_file_path, 'w') as f:
+        with open(output_file_path, "w") as f:
            json.dump(self.results, f, indent=4)