diff --git a/week5/community-contributions/w5_excercise.ipynb b/week5/community-contributions/w5_excercise.ipynb index d30c036..85430e3 100644 --- a/week5/community-contributions/w5_excercise.ipynb +++ b/week5/community-contributions/w5_excercise.ipynb @@ -1 +1,120 @@ -from datasets import load_dataset +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "25868a51", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from dotenv import load_dotenv\n", + "from openai import OpenAI\n", + "import gradio as gr" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "805b6abc", + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize OpenAI and constants\n", + "load_dotenv(override=True)\n", + "api_key = os.getenv('OPENAI_API_KEY')\n", + "MODEL = 'gpt-4o-mini'\n", + "openai = OpenAI()\n", + "\n", + "db_name = \"vector_db\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "382f71f2", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.document_loaders import WebBaseLoader\n", + "from langchain.text_splitter import CharacterTextSplitter\n", + "from langchain_openai import OpenAIEmbeddings, ChatOpenAI\n", + "from langchain_chroma import Chroma\n", + "from langchain.memory import ConversationBufferMemory\n", + "from langchain.chains import ConversationalRetrievalChain" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e41daa2f", + "metadata": {}, + "outputs": [], + "source": [ + "loader = WebBaseLoader([\"https://en.wikipedia.org/wiki/Solar_System\",\n", + " \"https://en.wikipedia.org/wiki/Star_Wars_(film)\"])\n", + "docs = loader.load()\n", + "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n", + "chunks = text_splitter.split_documents(docs)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a21e1776", + "metadata": {}, + "outputs": [], + "source": [ + "embeddings = OpenAIEmbeddings()\n", + "\n", + "if os.path.exists(db_name):\n", + " Chroma(persist_directory=db_name, embedding_function=embeddings).delete_collection()\n", + "\n", + "vectorstore = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=db_name)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb072ee8", + "metadata": {}, + "outputs": [], + "source": [ + "# create a new Chat with OpenAI\n", + "llm = ChatOpenAI(temperature=0.7, model_name=MODEL)\n", + "memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)\n", + "retriever = vectorstore.as_retriever()\n", + "conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0531ff73", + "metadata": {}, + "outputs": [], + "source": [ + "def chat(question, history):\n", + " result = conversation_chain.invoke({\"question\": question})\n", + " return result[\"answer\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a87fe151", + "metadata": {}, + "outputs": [], + "source": [ + "view = gr.ChatInterface(chat, type=\"messages\").launch(inbrowser=True)" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}