733 lines
21 KiB
Plaintext
733 lines
21 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "6c26c848",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Let's go PRO!\n",
|
|
"\n",
|
|
"Advanced RAG Techniques!\n",
|
|
"\n",
|
|
"Let's start by digging into ingest:\n",
|
|
"\n",
|
|
"1. No LangChain! Just native for maximum flexibility\n",
|
|
"2. Let's use an LLM to divide up chunks in a sensible way\n",
|
|
"3. Let's use the best chunk size and encoder from yesterday\n",
|
|
"4. Let's also have the LLM rewrite chunks in a way that's most useful (\"document pre-processing\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "7e9f5f1d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from pathlib import Path\n",
|
|
"from openai import OpenAI\n",
|
|
"from dotenv import load_dotenv\n",
|
|
"from pydantic import BaseModel, Field\n",
|
|
"from chromadb import PersistentClient\n",
|
|
"from tqdm import tqdm\n",
|
|
"from litellm import completion\n",
|
|
"import numpy as np\n",
|
|
"from sklearn.manifold import TSNE\n",
|
|
"import plotly.graph_objects as go\n",
|
|
"\n",
|
|
"\n",
|
|
"load_dotenv(override=True)\n",
|
|
"\n",
|
|
"MODEL = \"gpt-4.1-nano\"\n",
|
|
"\n",
|
|
"DB_NAME = \"preprocessed_db\"\n",
|
|
"collection_name = \"docs\"\n",
|
|
"embedding_model = \"text-embedding-3-large\"\n",
|
|
"KNOWLEDGE_BASE_PATH = Path(\"knowledge-base\")\n",
|
|
"AVERAGE_CHUNK_SIZE = 500\n",
|
|
"\n",
|
|
"openai = OpenAI()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "7bfac66f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Inspired by LangChain's Document - let's have something similar\n",
|
|
"\n",
|
|
"class Result(BaseModel):\n",
|
|
" page_content: str\n",
|
|
" metadata: dict"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "8b9d0b58",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# A class to perfectly represent a chunk\n",
|
|
"\n",
|
|
"class Chunk(BaseModel):\n",
|
|
" headline: str = Field(description=\"A brief heading for this chunk, typically a few words, that is most likely to be surfaced in a query\")\n",
|
|
" summary: str = Field(description=\"A few sentences summarizing the content of this chunk to answer common questions\")\n",
|
|
" original_text: str = Field(description=\"The original text of this chunk from the provided document, exactly as is, not changed in any way\")\n",
|
|
"\n",
|
|
" def as_result(self, document):\n",
|
|
" metadata = {\"source\": document[\"source\"], \"type\": document[\"type\"]}\n",
|
|
" return Result(page_content=self.headline + \"\\n\\n\" + self.summary + \"\\n\\n\" + self.original_text,metadata=metadata)\n",
|
|
"\n",
|
|
"\n",
|
|
"class Chunks(BaseModel):\n",
|
|
" chunks: list[Chunk]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "284b64c1",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Three steps:\n",
|
|
"\n",
|
|
"1. Fetch documents from the knowledge base, like LangChain did\n",
|
|
"2. Call an LLM to turn documents into Chunks\n",
|
|
"3. Store the Chunks in Chroma\n",
|
|
"\n",
|
|
"That's it!\n",
|
|
"\n",
|
|
"### Let's start with Step 1"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "db5abdca",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def fetch_documents():\n",
|
|
" \"\"\"A homemade version of the LangChain DirectoryLoader\"\"\"\n",
|
|
"\n",
|
|
" documents = []\n",
|
|
"\n",
|
|
" for folder in KNOWLEDGE_BASE_PATH.iterdir():\n",
|
|
" doc_type = folder.name\n",
|
|
" for file in folder.rglob(\"*.md\"):\n",
|
|
" with open(file, \"r\", encoding=\"utf-8\") as f:\n",
|
|
" documents.append({\"type\": doc_type, \"source\": file.as_posix(), \"text\": f.read()})\n",
|
|
"\n",
|
|
" print(f\"Loaded {len(documents)} documents\")\n",
|
|
" return documents"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "5fe0a213",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"documents = fetch_documents()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "dffa1c68",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Donezo! On to Step 2 - make the chunks"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "900e4170",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def make_prompt(document):\n",
|
|
" how_many = (len(document[\"text\"]) // AVERAGE_CHUNK_SIZE) + 1\n",
|
|
" return f\"\"\"\n",
|
|
"You take a document and you split the document into overlapping chunks for a KnowledgeBase.\n",
|
|
"\n",
|
|
"The document is from the shared drive of a company called Insurellm.\n",
|
|
"The document is of type: {document[\"type\"]}\n",
|
|
"The document has been retrieved from: {document[\"source\"]}\n",
|
|
"\n",
|
|
"A chatbot will use these chunks to answer questions about the company.\n",
|
|
"You should divide up the document as you see fit, being sure that the entire document is returned in the chunks - don't leave anything out.\n",
|
|
"This document should probably be split into {how_many} chunks, but you can have more or less as appropriate.\n",
|
|
"There should be overlap between the chunks as appropriate; typically about 25% overlap or about 50 words, so you have the same text in multiple chunks for best retrieval results.\n",
|
|
"\n",
|
|
"For each chunk, you should provide a headline, a summary, and the original text of the chunk.\n",
|
|
"Together your chunks should represent the entire document with overlap.\n",
|
|
"\n",
|
|
"Here is the document:\n",
|
|
"\n",
|
|
"{document[\"text\"]}\n",
|
|
"\n",
|
|
"Respond with the chunks.\n",
|
|
"\"\"\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "f38103b4",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(make_prompt(documents[0]))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "02f58850",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def make_messages(document):\n",
|
|
" return [\n",
|
|
" {\"role\": \"user\", \"content\": make_prompt(document)},\n",
|
|
" ]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "2ab04779",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"make_messages(documents[0])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "ea20aba8",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def process_document(document):\n",
|
|
" messages = make_messages(document)\n",
|
|
" response = completion(model=MODEL, messages=messages, response_format=Chunks)\n",
|
|
" reply = response.choices[0].message.content\n",
|
|
" doc_as_chunks = Chunks.model_validate_json(reply).chunks\n",
|
|
" return [chunk.as_result(document) for chunk in doc_as_chunks]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "480494d8",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"process_document(documents[0])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "2ccab1e3",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def create_chunks(documents):\n",
|
|
" chunks = []\n",
|
|
" for doc in tqdm(documents):\n",
|
|
" chunks.extend(process_document(doc))\n",
|
|
" return chunks"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "93115f0c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"chunks = create_chunks(documents)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "4f51544f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(len(chunks))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "9750104c",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Well that was easy! If a bit slow.\n",
|
|
"\n",
|
|
"In the python module version, I sneakily use the multi-processing Pool to run this in parallel,\n",
|
|
"but if you get a Rate Limit Error you can turn this off in the code.\n",
|
|
"\n",
|
|
"### Finally, Step 3 - save the embeddings"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "b19f36b7",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def create_embeddings(chunks):\n",
|
|
" chroma = PersistentClient(path=DB_NAME)\n",
|
|
" if collection_name in [c.name for c in chroma.list_collections()]:\n",
|
|
" chroma.delete_collection(collection_name)\n",
|
|
"\n",
|
|
" texts = [chunk.page_content for chunk in chunks]\n",
|
|
" emb = openai.embeddings.create(model=embedding_model, input=texts).data\n",
|
|
" vectors = [e.embedding for e in emb]\n",
|
|
"\n",
|
|
" collection = chroma.get_or_create_collection(collection_name)\n",
|
|
"\n",
|
|
" ids = [str(i) for i in range(len(chunks))]\n",
|
|
" metas = [chunk.metadata for chunk in chunks]\n",
|
|
"\n",
|
|
" collection.add(ids=ids, embeddings=vectors, documents=texts, metadatas=metas)\n",
|
|
" print(f\"Vectorstore created with {collection.count()} documents\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "34f52038",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"create_embeddings(chunks)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "3cf738d0",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Nothing more to do here... right?\n",
|
|
"\n",
|
|
"Wait! Didja think I'd forget??"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "c318a46f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"chroma = PersistentClient(path=DB_NAME)\n",
|
|
"collection = chroma.get_or_create_collection(collection_name)\n",
|
|
"result = collection.get(include=['embeddings', 'documents', 'metadatas'])\n",
|
|
"vectors = np.array(result['embeddings'])\n",
|
|
"documents = result['documents']\n",
|
|
"metadatas = result['metadatas']\n",
|
|
"doc_types = [metadata['type'] for metadata in metadatas]\n",
|
|
"colors = [['blue', 'green', 'red', 'orange'][['products', 'employees', 'contracts', 'company'].index(t)] for t in doc_types]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "c4683c9a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"tsne = TSNE(n_components=2, random_state=42)\n",
|
|
"reduced_vectors = tsne.fit_transform(vectors)\n",
|
|
"\n",
|
|
"# Create the 2D scatter plot\n",
|
|
"fig = go.Figure(data=[go.Scatter(\n",
|
|
" x=reduced_vectors[:, 0],\n",
|
|
" y=reduced_vectors[:, 1],\n",
|
|
" mode='markers',\n",
|
|
" marker=dict(size=5, color=colors, opacity=0.8),\n",
|
|
" text=[f\"Type: {t}<br>Text: {d[:100]}...\" for t, d in zip(doc_types, documents)],\n",
|
|
" hoverinfo='text'\n",
|
|
")])\n",
|
|
"\n",
|
|
"fig.update_layout(title='2D Chroma Vector Store Visualization',\n",
|
|
" scene=dict(xaxis_title='x',yaxis_title='y'),\n",
|
|
" width=800,\n",
|
|
" height=600,\n",
|
|
" margin=dict(r=20, b=10, l=10, t=40)\n",
|
|
")\n",
|
|
"\n",
|
|
"fig.show()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "2296866a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"tsne = TSNE(n_components=3, random_state=42)\n",
|
|
"reduced_vectors = tsne.fit_transform(vectors)\n",
|
|
"\n",
|
|
"# Create the 3D scatter plot\n",
|
|
"fig = go.Figure(data=[go.Scatter3d(\n",
|
|
" x=reduced_vectors[:, 0],\n",
|
|
" y=reduced_vectors[:, 1],\n",
|
|
" z=reduced_vectors[:, 2],\n",
|
|
" mode='markers',\n",
|
|
" marker=dict(size=5, color=colors, opacity=0.8),\n",
|
|
" text=[f\"Type: {t}<br>Text: {d[:100]}...\" for t, d in zip(doc_types, documents)],\n",
|
|
" hoverinfo='text'\n",
|
|
")])\n",
|
|
"\n",
|
|
"fig.update_layout(\n",
|
|
" title='3D Chroma Vector Store Visualization',\n",
|
|
" scene=dict(xaxis_title='x', yaxis_title='y', zaxis_title='z'),\n",
|
|
" width=900,\n",
|
|
" height=700,\n",
|
|
" margin=dict(r=10, b=10, l=10, t=40)\n",
|
|
")\n",
|
|
"\n",
|
|
"fig.show()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "ba72b54a",
|
|
"metadata": {},
|
|
"source": [
|
|
"## And now - let's build an Advanced RAG!\n",
|
|
"\n",
|
|
"We will use these techniques:\n",
|
|
"\n",
|
|
"1. Reranking - reorder the rank results\n",
|
|
"2. Query re-writing"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a0a3818c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"class RankOrder(BaseModel):\n",
|
|
" order: list[int] = Field(\n",
|
|
" description=\"The order of relevance of chunks, from most relevant to least relevant, by chunk id number\"\n",
|
|
" )"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "0e8446c4",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def rerank(question, chunks):\n",
|
|
" system_prompt = \"\"\"\n",
|
|
"You are a document re-ranker.\n",
|
|
"You are provided with a question and a list of relevant chunks of text from a query of a knowledge base.\n",
|
|
"The chunks are provided in the order they were retrieved; this should be approximately ordered by relevance, but you may be able to improve on that.\n",
|
|
"You must rank order the provided chunks by relevance to the question, with the most relevant chunk first.\n",
|
|
"Reply only with the list of ranked chunk ids, nothing else. Include all the chunk ids you are provided with, reranked.\n",
|
|
"\"\"\"\n",
|
|
" user_prompt = f\"The user has asked the following question:\\n\\n{question}\\n\\nOrder all the chunks of text by relevance to the question, from most relevant to least relevant. Include all the chunk ids you are provided with, reranked.\\n\\n\"\n",
|
|
" user_prompt += \"Here are the chunks:\\n\\n\"\n",
|
|
" for index, chunk in enumerate(chunks):\n",
|
|
" user_prompt += f\"# CHUNK ID: {index + 1}:\\n\\n{chunk.page_content}\\n\\n\"\n",
|
|
" user_prompt += \"Reply only with the list of ranked chunk ids, nothing else.\"\n",
|
|
" messages = [\n",
|
|
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
|
" {\"role\": \"user\", \"content\": user_prompt},\n",
|
|
" ]\n",
|
|
" response = completion(model=MODEL, messages=messages, response_format=RankOrder)\n",
|
|
" reply = response.choices[0].message.content\n",
|
|
" order = RankOrder.model_validate_json(reply).order\n",
|
|
" print(order)\n",
|
|
" return [chunks[i - 1] for i in order]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "fa78048d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"RETRIEVAL_K = 10\n",
|
|
"\n",
|
|
"def fetch_context_unranked(question):\n",
|
|
" query = openai.embeddings.create(model=embedding_model, input=[question]).data[0].embedding\n",
|
|
" results = collection.query(query_embeddings=[query], n_results=RETRIEVAL_K)\n",
|
|
" chunks = []\n",
|
|
" for result in zip(results[\"documents\"][0], results[\"metadatas\"][0]):\n",
|
|
" chunks.append(Result(page_content=result[0], metadata=result[1]))\n",
|
|
" return chunks"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "3b53f6de",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "af2ed5be",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"question = \"Who won the IIOTY award?\"\n",
|
|
"chunks = fetch_context_unranked(question)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "0794453f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"for chunk in chunks:\n",
|
|
" print(chunk.page_content[:15]+\"...\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "7661e767",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"reranked = rerank(question, chunks)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a23594f5",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"for chunk in reranked:\n",
|
|
" print(chunk.page_content[:15]+\"...\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "405de4d0",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"question = \"Who went to Manchester University?\"\n",
|
|
"RETRIEVAL_K = 20\n",
|
|
"chunks = fetch_context_unranked(question)\n",
|
|
"for index, c in enumerate(chunks):\n",
|
|
" if \"manchester\" in c.page_content.lower():\n",
|
|
" print(index)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "2b9e343f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"reranked = rerank(question, chunks)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "22948df7",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"for index, c in enumerate(reranked):\n",
|
|
" if \"manchester\" in c.page_content.lower():\n",
|
|
" print(index)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "ca1cae50",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"reranked[0].page_content"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "22ca740b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def fetch_context(question):\n",
|
|
" chunks = fetch_context_unranked(question)\n",
|
|
" return rerank(question, chunks)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "0547204c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"SYSTEM_PROMPT = \"\"\"\n",
|
|
"You are a knowledgeable, friendly assistant representing the company Insurellm.\n",
|
|
"You are chatting with a user about Insurellm.\n",
|
|
"Your answer will be evaluated for accuracy, relevance and completeness, so make sure it only answers the question and fully answers it.\n",
|
|
"If you don't know the answer, say so.\n",
|
|
"For context, here are specific extracts from the Knowledge Base that might be directly relevant to the user's question:\n",
|
|
"{context}\n",
|
|
"\n",
|
|
"With this context, please answer the user's question. Be accurate, relevant and complete.\n",
|
|
"\"\"\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "0c1b5c57",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# In the context, include the source of the chunk\n",
|
|
"\n",
|
|
"def make_rag_messages(question, history, chunks):\n",
|
|
" context = \"\\n\\n\".join(f\"Extract from {chunk.metadata['source']}:\\n{chunk.page_content}\" for chunk in chunks)\n",
|
|
" system_prompt = SYSTEM_PROMPT.format(context=context)\n",
|
|
" return [{\"role\": \"system\", \"content\": system_prompt}] + history + [{\"role\": \"user\", \"content\": question}]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "77d9491c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def rewrite_query(question, history=[]):\n",
|
|
" \"\"\"Rewrite the user's question to be a more specific question that is more likely to surface relevant content in the Knowledge Base.\"\"\"\n",
|
|
" message = f\"\"\"\n",
|
|
"You are in a conversation with a user, answering questions about the company Insurellm.\n",
|
|
"You are about to look up information in a Knowledge Base to answer the user's question.\n",
|
|
"\n",
|
|
"This is the history of your conversation so far with the user:\n",
|
|
"{history}\n",
|
|
"\n",
|
|
"And this is the user's current question:\n",
|
|
"{question}\n",
|
|
"\n",
|
|
"Respond only with a single, refined question that you will use to search the Knowledge Base.\n",
|
|
"It should be a VERY short specific question most likely to surface content. Focus on the question details.\n",
|
|
"Don't mention the company name unless it's a general question about the company.\n",
|
|
"IMPORTANT: Respond ONLY with the knowledgebase query, nothing else.\n",
|
|
"\"\"\"\n",
|
|
" response = completion(model=MODEL, messages=[{\"role\": \"system\", \"content\": message}])\n",
|
|
" return response.choices[0].message.content"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "c8d050a1",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"rewrite_query(\"Who won the IIOTY award?\", [])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "909a1ecd",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def answer_question(question: str, history: list[dict] = []) -> tuple[str, list]:\n",
|
|
" \"\"\"\n",
|
|
" Answer a question using RAG and return the answer and the retrieved context\n",
|
|
" \"\"\"\n",
|
|
" query = rewrite_query(question, history)\n",
|
|
" print(query)\n",
|
|
" chunks = fetch_context(query)\n",
|
|
" messages = make_rag_messages(question, history, chunks)\n",
|
|
" response = completion(model=MODEL, messages=messages)\n",
|
|
" return response.choices[0].message.content, chunks"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "b0ab3e5f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"answer_question(\"Who won the IIOTY award?\", [])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "3237722e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"answer_question(\"Who went to Manchester University?\", [])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "64c93b30",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": ".venv",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.12.9"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|