Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,6 @@ __pycache__/

# ignore adalflow cache
/adalflow

# Ignore test files
rag_logs.md
206 changes: 206 additions & 0 deletions rag_logs.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@

### 2025-02-07 12:31:49
🔍 No conversation history, not a clarification.

### 2025-02-07 12:31:50

#### Context Usage
- 🔎 Retrieved new documents for query
- 📚 Number of new documents: 20
- 📄 Document paths:
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/rag_playbook.rst
- docs/source/tutorials/rag_playbook.rst
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/evaluation.rst
- docs/source/tutorials/rag_playbook.rst
- docs/source/tutorials/evaluation.rst
- tutorials/rag/rag.py
- adalflow/adalflow/core/retriever.py
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/index.rst
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/rag_playbook.rst
- docs/source/tutorials/db.rst


### 2025-02-07 12:32:21

#### Query Analysis
- 🤔 Query: 'can you give me example usage?'
- 📝 Result: This is a clarification


### 2025-02-07 12:32:21

#### Context Usage
- ♻️ Reusing previous context for clarification query
- 📚 Number of reused documents: 20
- 📄 Document paths:
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/rag_playbook.rst
- docs/source/tutorials/rag_playbook.rst
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/evaluation.rst
- docs/source/tutorials/rag_playbook.rst
- docs/source/tutorials/evaluation.rst
- tutorials/rag/rag.py
- adalflow/adalflow/core/retriever.py
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/index.rst
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/rag_playbook.rst
- docs/source/tutorials/db.rst


### 2025-02-07 12:33:21

#### Query Analysis
- 🤔 Query: 'can you use groq client instead of openai for the generation'
- 📝 Result: This is a clarification


### 2025-02-07 12:33:21

#### Context Usage
- ♻️ Reusing previous context for clarification query
- 📚 Number of reused documents: 20
- 📄 Document paths:
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/rag_playbook.rst
- docs/source/tutorials/rag_playbook.rst
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/evaluation.rst
- docs/source/tutorials/rag_playbook.rst
- docs/source/tutorials/evaluation.rst
- tutorials/rag/rag.py
- adalflow/adalflow/core/retriever.py
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/index.rst
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/rag_playbook.rst
- docs/source/tutorials/db.rst


### 2025-02-07 12:33:55

#### Query Analysis
- 🤔 Query: 'provide me with the whole example'
- 📝 Result: This is a clarification


### 2025-02-07 12:33:55

#### Context Usage
- ♻️ Reusing previous context for clarification query
- 📚 Number of reused documents: 20
- 📄 Document paths:
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/rag_playbook.rst
- docs/source/tutorials/rag_playbook.rst
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/evaluation.rst
- docs/source/tutorials/rag_playbook.rst
- docs/source/tutorials/evaluation.rst
- tutorials/rag/rag.py
- adalflow/adalflow/core/retriever.py
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/index.rst
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/rag_playbook.rst
- docs/source/tutorials/db.rst


### 2025-02-07 12:34:35

#### Query Analysis
- 🤔 Query: 'provide me with the whole example but use the groq client'
- 📝 Result: This is a clarification


### 2025-02-07 12:34:35

#### Context Usage
- ♻️ Reusing previous context for clarification query
- 📚 Number of reused documents: 20
- 📄 Document paths:
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/rag_playbook.rst
- docs/source/tutorials/rag_playbook.rst
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/evaluation.rst
- docs/source/tutorials/rag_playbook.rst
- docs/source/tutorials/evaluation.rst
- tutorials/rag/rag.py
- adalflow/adalflow/core/retriever.py
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/index.rst
- docs/source/tutorials/retriever.rst
- docs/source/tutorials/rag_playbook.rst
- docs/source/tutorials/db.rst


### 2025-02-07 12:35:16

#### Query Analysis
- 🤔 Query: 'can you explain me about agent compoenent.'
- 📝 Result: This is a new question


### 2025-02-07 12:35:17

#### Context Usage
- 🔎 Retrieved new documents for query
- 📚 Number of new documents: 20
- 📄 Document paths:
- adalflow/adalflow/components/agent/README.md
- docs/source/tutorials/agent.rst
- docs/source/tutorials/component.rst
- docs/source/tutorials/index.rst
- docs/source/tutorials/index.rst
- docs/source/tutorials/agent.rst
- docs/source/apis/components/index.rst
- docs/source/tutorials/index.rst
- adalflow/tests/test_react_agent.py
- docs/source/tutorials/component.rst
- use_cases/unsorted/simple_qa_groq.py
- docs/source/apis/core/index.rst
- docs/source/tutorials/auto_text_grad.rst
- adalflow/tests/test_component.py
- docs/source/tutorials/adalcomponent.rst
- use_cases/unsorted/simple_qa_memory.py
- docs/source/tutorials/auto_text_grad.rst
- docs/source/tutorials/index.rst
- use_cases/unsorted/simple_qa_trainable.py
- docs/source/tutorials/component.rst

125 changes: 107 additions & 18 deletions src/rag.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from typing import Any, List
from typing import Any, List, Tuple, Optional
from uuid import uuid4
import os
from datetime import datetime

import adalflow as adal
from adalflow.core.types import (
Expand All @@ -16,6 +18,7 @@
from config import configs
from src.data_pipeline import DatabaseManager
from adalflow.utils import printc
from dataclasses import dataclass, field


class Memory(DataComponent):
Expand All @@ -41,6 +44,14 @@ def add_dialog_turn(self, user_query: str, assistant_response: str):
self.current_conversation.append_dialog_turn(dialog_turn)


@dataclass
class RAGAnswer(adal.DataClass):
rationale: str = field(default="", metadata={"desc": "Rationale for the answer."})
answer: str = field(default="", metadata={"desc": "Answer to the user query."})

__output_fields__ = ["rationale", "answer"]


system_prompt = r"""
You are a code assistant which answer's user question on a Github Repo.
You will receive user query, relevant context, and past conversation history.
Expand Down Expand Up @@ -75,16 +86,6 @@ def add_dialog_turn(self, user_query: str, assistant_response: str):
<END_OF_USER_PROMPT>
"""

from dataclasses import dataclass, field


@dataclass
class RAGAnswer(adal.DataClass):
rationale: str = field(default="", metadata={"desc": "Rationale for the answer."})
answer: str = field(default="", metadata={"desc": "Answer to the user query."})

__output_fields__ = ["rationale", "answer"]


class RAG(adal.Component):
__doc__ = """RAG with one repo.
Expand Down Expand Up @@ -119,6 +120,13 @@ def __init__(self):
model_kwargs=configs["generator"]["model_kwargs"],
output_processors=data_parser,
)
self.previous_retrieved_documents = None

# Initialize log file
self.log_file = "rag_logs.md"
if os.path.exists(self.log_file):
# Clear previous logs when starting new session
open(self.log_file, 'w').close()

def initialize_db_manager(self):
self.db_manager = DatabaseManager()
Expand All @@ -136,15 +144,89 @@ def prepare_retriever(self, repo_url_or_path: str):
document_map_func=lambda doc: doc.vector,
)

def call(self, query: str) -> Any:
def log_to_file(self, message: str):
"""Write log messages to file with timestamp"""
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
with open(self.log_file, 'a') as f:
f.write(f"\n### {timestamp}\n{message}\n")

def is_clarification_query(self, query: str) -> bool:
"""
Determines if the current query is a clarification of a previous query.
"""
if not self.memory():
self.log_to_file("🔍 No conversation history, not a clarification.")
return False

clarification_prompt = f"""
You are a clarification detector. Analyze if the query is a follow-up or clarification of the previous conversation.
Your response should include:
- A rationale explaining your reasoning
- A clear True/False answer

Output your response in this format:
{{
"rationale": "Your step-by-step reasoning here",
"answer": "True or False"
}}

Conversation History:
{self.memory()}

Query:
{query}
"""
response = self.generator(
prompt_kwargs={
"conversation_history": self.memory(),
"system_prompt": clarification_prompt,
},
)

is_clarification = "true" in response.data.answer.lower()
log_message = f"""
#### Query Analysis
- 🤔 Query: '{query}'
- 📝 Result: {'This is a clarification' if is_clarification else 'This is a new question'}
"""
self.log_to_file(log_message)
return is_clarification

def call(self, query: str) -> Tuple[Any, Any]:
previous_context = (
self.previous_retrieved_documents[0].documents
if self.previous_retrieved_documents
else None
)

retrieved_documents = self.retriever(query)
is_clarification = self.is_clarification_query(query)

# fill in the document
retrieved_documents[0].documents = [
self.transformed_docs[doc_index]
for doc_index in retrieved_documents[0].doc_indices
]
if is_clarification and self.previous_retrieved_documents:
retrieved_documents = self.previous_retrieved_documents
log_message = f"""
#### Context Usage
- ♻️ Reusing previous context for clarification query
- 📚 Number of reused documents: {len(retrieved_documents[0].documents)}
- 📄 Document paths:
{self._format_doc_paths(retrieved_documents[0].documents)}
"""
self.log_to_file(log_message)
else:
retrieved_documents = self.retriever(query)
retrieved_documents[0].documents = [
self.transformed_docs[doc_index]
for doc_index in retrieved_documents[0].doc_indices
]
self.previous_retrieved_documents = retrieved_documents

log_message = f"""
#### Context Usage
- 🔎 Retrieved new documents for query
- 📚 Number of new documents: {len(retrieved_documents[0].documents)}
- 📄 Document paths:
{self._format_doc_paths(retrieved_documents[0].documents)}
"""
self.log_to_file(log_message)

printc(f"retrieved_documents: {retrieved_documents[0].documents}")
printc(f"memory: {self.memory()}")
Expand All @@ -168,6 +250,13 @@ def call(self, query: str) -> Any:

return final_response, retrieved_documents

def _format_doc_paths(self, documents: List[Any]) -> str:
"""Helper to format document paths for logging"""
return "\n ".join(
f"- {doc.meta_data.get('file_path', 'unknown')}"
for doc in documents
)


if __name__ == "__main__":
from adalflow.utils import get_logger
Expand Down