adjust file, there is an Notebook formatting error

This commit is contained in:
misi
2025-09-30 22:27:23 +13:00
parent 88fa795c5f
commit 2cceb753fd

View File

@@ -1 +1,120 @@
from datasets import load_dataset
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "25868a51",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from dotenv import load_dotenv\n",
"from openai import OpenAI\n",
"import gradio as gr"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "805b6abc",
"metadata": {},
"outputs": [],
"source": [
"# Initialize OpenAI and constants\n",
"load_dotenv(override=True)\n",
"api_key = os.getenv('OPENAI_API_KEY')\n",
"MODEL = 'gpt-4o-mini'\n",
"openai = OpenAI()\n",
"\n",
"db_name = \"vector_db\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "382f71f2",
"metadata": {},
"outputs": [],
"source": [
"from langchain.document_loaders import WebBaseLoader\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_openai import OpenAIEmbeddings, ChatOpenAI\n",
"from langchain_chroma import Chroma\n",
"from langchain.memory import ConversationBufferMemory\n",
"from langchain.chains import ConversationalRetrievalChain"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e41daa2f",
"metadata": {},
"outputs": [],
"source": [
"loader = WebBaseLoader([\"https://en.wikipedia.org/wiki/Solar_System\",\n",
" \"https://en.wikipedia.org/wiki/Star_Wars_(film)\"])\n",
"docs = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n",
"chunks = text_splitter.split_documents(docs)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a21e1776",
"metadata": {},
"outputs": [],
"source": [
"embeddings = OpenAIEmbeddings()\n",
"\n",
"if os.path.exists(db_name):\n",
" Chroma(persist_directory=db_name, embedding_function=embeddings).delete_collection()\n",
"\n",
"vectorstore = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=db_name)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fb072ee8",
"metadata": {},
"outputs": [],
"source": [
"# create a new Chat with OpenAI\n",
"llm = ChatOpenAI(temperature=0.7, model_name=MODEL)\n",
"memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)\n",
"retriever = vectorstore.as_retriever()\n",
"conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0531ff73",
"metadata": {},
"outputs": [],
"source": [
"def chat(question, history):\n",
" result = conversation_chain.invoke({\"question\": question})\n",
" return result[\"answer\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a87fe151",
"metadata": {},
"outputs": [],
"source": [
"view = gr.ChatInterface(chat, type=\"messages\").launch(inbrowser=True)"
]
}
],
"metadata": {
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 5
}