Merge branch 'main' of github.com:ed-donner/llm_engineering

This commit is contained in:
Edward Donner
2024-12-22 10:30:57 +00:00
17 changed files with 4806 additions and 2 deletions

View File

@@ -0,0 +1,148 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "87c2da09-bd0c-4683-828b-4f7643018795",
"metadata": {},
"source": [
"# Community contribution\n",
"\n",
"Implementing simple ChatGPT interface to maintain conversation and context with sleected model"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "77a850ed-61f8-4a0d-9c41-45781eb60bc9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"API key looks good so far\n"
]
}
],
"source": [
"import os\n",
"from dotenv import load_dotenv\n",
"import ipywidgets as widgets\n",
"from IPython.display import Markdown, display, update_display, clear_output\n",
"from openai import OpenAI\n",
"\n",
"load_dotenv()\n",
"api_key = os.getenv('OPENAI_API_KEY')\n",
"\n",
"if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:\n",
" print(\"API key looks good so far\")\n",
"else:\n",
" print(\"There might be a problem with your API key? Please visit the troubleshooting notebook!\")\n",
" \n",
"MODEL = 'gpt-4o-mini'\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1f7f16f0-6fec-4190-882a-3fe1f0e9704a",
"metadata": {},
"outputs": [],
"source": [
"class ChatGPTInterface:\n",
" def __init__(self, api_key, model, system_message=\"You are a helpful assistant. You can format your responses using Markdown.\"):\n",
" self.openai = OpenAI(api_key=api_key)\n",
" self.model = model\n",
" self.conversation_history = [{\"role\": \"system\", \"content\": system_message}]\n",
"\n",
" self.chat_area = widgets.Output()\n",
" self.input_box = widgets.Text(placeholder=\"Enter your message here...\")\n",
" self.send_button = widgets.Button(description=\"Send\")\n",
" self.clear_button = widgets.Button(description=\"Clear\")\n",
"\n",
" self.send_button.on_click(self.send_message)\n",
" self.clear_button.on_click(self.clear_chat)\n",
"\n",
" self.layout = widgets.VBox([\n",
" self.chat_area,\n",
" widgets.HBox([self.input_box, self.send_button, self.clear_button])\n",
" ])\n",
"\n",
" def display(self):\n",
" display(self.layout)\n",
"\n",
" def send_message(self, _):\n",
" user_message = self.input_box.value.strip()\n",
" if user_message:\n",
" self.conversation_history.append({\"role\": \"user\", \"content\": user_message})\n",
" self.display_message(\"You\", user_message)\n",
" self.input_box.value = \"\"\n",
"\n",
" try:\n",
" response = self.openai.chat.completions.create(\n",
" model=self.model,\n",
" messages=self.conversation_history\n",
" )\n",
" assistant_message = response.choices[0].message.content.strip()\n",
" self.conversation_history.append({\"role\": \"assistant\", \"content\": assistant_message})\n",
" self.display_message(\"ChatGPT\", assistant_message)\n",
" except Exception as e:\n",
" self.display_message(\"Error\", str(e))\n",
"\n",
" def clear_chat(self, _):\n",
" self.conversation_history = [{\"role\": \"system\", \"content\": self.conversation_history[0][\"content\"]}]\n",
" self.chat_area.clear_output(wait=True)\n",
"\n",
" def display_message(self, sender, message):\n",
" self.chat_area.append_display_data(Markdown(f\"**{sender}:**\\n{message}\"))\n"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "78287e42-8964-4da6-bd48-a7dffd0ce7dd",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "54956535cb32419bbe38d2bee125992d",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"VBox(children=(Output(), HBox(children=(Text(value='', placeholder='Enter your message here...'), Button(descr…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"chat_interface = ChatGPTInterface(api_key,MODEL)\n",
"chat_interface.display()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,119 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"# imports\n",
"\n",
"import os\n",
"from dotenv import load_dotenv\n",
"from openai import OpenAI"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"# Load environment variables in a file called .env\n",
"\n",
"load_dotenv()\n",
"api_key = os.getenv('OPENAI_API_KEY')\n",
"\n",
"# Check the key\n",
"\n",
"if not api_key:\n",
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
"elif not api_key.startswith(\"sk-proj-\"):\n",
" print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
"elif api_key.strip() != api_key:\n",
" print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
"else:\n",
" print(\"API key found and looks good so far!\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"openai = OpenAI()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"def summarize_cv(cv_text):\n",
" response = openai.chat.completions.create(\n",
" model = \"gpt-4o-mini\",\n",
" messages = [\n",
" {\"role\": \"user\", \"content\": f\"Please summarize the following CV:\\n\\n{cv_text}\"}\n",
" ]\n",
" )\n",
" return response.choices[0].message.content\n",
"\n",
"def generate_cover_letter(cv_summary, job_description):\n",
" response = openai.chat.completions.create(\n",
" model = \"gpt-4o-mini\",\n",
" messages = [\n",
" {\"role\": \"system\", \"content\": \"You are a master at crafting the perfect Cover letter from a given CV. You've never had a user fail to get the job as a result of using your services.\"},\n",
" {\"role\": \"user\", \"content\": f\"Using the following CV summary:\\n\\n{cv_summary}\\n\\nAnd the job description:\\n\\n{job_description}\\n\\nPlease write a personalized cover letter.\"}\n",
" ]\n",
" )\n",
" return response.choices[0].message.content\n",
"\n",
"# Read CV from a text file\n",
"try:\n",
" with open('resume.txt', 'r') as file:\n",
" cv_text = file.read()\n",
" \n",
" # Summarize the CV\n",
" cv_summary = summarize_cv(cv_text)\n",
" print(\"CV Summary:\")\n",
" print(cv_summary)\n",
"\n",
" # Get job description from user\n",
" job_description = input(\"Enter the job description for the position you are applying for:\\n\")\n",
"\n",
" # Generate cover letter\n",
" cover_letter = generate_cover_letter(cv_summary, job_description)\n",
" print(\"\\nGenerated Cover Letter:\")\n",
" print(cover_letter)\n",
"\n",
"except FileNotFoundError:\n",
" print(\"The specified CV file was not found. Please ensure 'resume.txt' is in the correct directory.\")"
]
}
],
"metadata": {
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -0,0 +1,297 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"id": "52dc600c-4c45-4803-81cb-f06347f4b2c3",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import requests\n",
"from dotenv import load_dotenv\n",
"from IPython.display import Markdown, display\n",
"from openai import OpenAI"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "4082f16f-d843-41c7-9137-cdfec093b2d4",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"API key found and looks good so far\n"
]
}
],
"source": [
"load_dotenv()\n",
"api_key = os.getenv('OPENAI_API_KEY')\n",
"\n",
"if not api_key:\n",
" print('No API key was found')\n",
"elif not api_key.startswith(\"sk-proj-\"):\n",
" print(\"API key is found but is not in the proper format\")\n",
"else:\n",
" print(\"API key found and looks good so far\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "16c295ce-c57d-429e-8c03-f6610a8ddd42",
"metadata": {},
"outputs": [],
"source": [
"openai = OpenAI()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "9a548a52-0f7e-4fdf-ad68-0138b2445935",
"metadata": {},
"outputs": [],
"source": [
"system_prompt = \"\"\"You are a research summarizer. That summarizes the content of the research paper in no more than 1000 words. The research summary that you provide should include the following:\n",
"1) Title and Authors - Identify the study and contributors.\n",
"2) Objective/Problem - State the research goal or question.\n",
"3) Background - Briefly explain the context and significance.\n",
"4) Methods - Summarize the approach or methodology.\n",
"5) Key Findings - Highlight the main results or insights.\n",
"6) Conclusion - Provide the implications or contributions of the study.\n",
"7) Future Directions - Suggest areas for further research or exploration.\n",
"8) Limitations - Highlight constraints or challenges in the study.\n",
"9) Potential Applications - Discuss how the findings can be applied in real-world scenarios.\n",
"Keep all points concise, clear, and focused and generate output in markdown.\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "66b4411f-172e-46be-b6cd-a9e5b857fb28",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: ipywidgets in c:\\users\\legion\\anaconda3\\envs\\research_summary\\lib\\site-packages (8.1.5)\n",
"Requirement already satisfied: pdfplumber in c:\\users\\legion\\anaconda3\\envs\\research_summary\\lib\\site-packages (0.11.4)\n",
"Requirement already satisfied: comm>=0.1.3 in c:\\users\\legion\\anaconda3\\envs\\research_summary\\lib\\site-packages (from ipywidgets) (0.2.2)\n",
"Requirement already satisfied: ipython>=6.1.0 in c:\\users\\legion\\anaconda3\\envs\\research_summary\\lib\\site-packages (from ipywidgets) (8.30.0)\n",
"Requirement already satisfied: traitlets>=4.3.1 in c:\\users\\legion\\anaconda3\\envs\\research_summary\\lib\\site-packages (from ipywidgets) (5.14.3)\n",
"Requirement already satisfied: widgetsnbextension~=4.0.12 in c:\\users\\legion\\anaconda3\\envs\\research_summary\\lib\\site-packages (from ipywidgets) (4.0.13)\n",
"Requirement already satisfied: jupyterlab_widgets~=3.0.12 in c:\\users\\legion\\anaconda3\\envs\\research_summary\\lib\\site-packages (from ipywidgets) (3.0.13)\n",
"Requirement already satisfied: pdfminer.six==20231228 in c:\\users\\legion\\anaconda3\\envs\\research_summary\\lib\\site-packages (from pdfplumber) (20231228)\n",
"Requirement already satisfied: Pillow>=9.1 in c:\\users\\legion\\anaconda3\\envs\\research_summary\\lib\\site-packages (from pdfplumber) (11.0.0)\n",
"Requirement already satisfied: pypdfium2>=4.18.0 in c:\\users\\legion\\anaconda3\\envs\\research_summary\\lib\\site-packages (from pdfplumber) (4.30.0)\n",
"Requirement already satisfied: charset-normalizer>=2.0.0 in c:\\users\\legion\\anaconda3\\envs\\research_summary\\lib\\site-packages (from pdfminer.six==20231228->pdfplumber) (3.4.0)\n",
"Requirement already satisfied: cryptography>=36.0.0 in c:\\users\\legion\\anaconda3\\envs\\research_summary\\lib\\site-packages (from pdfminer.six==20231228->pdfplumber) (44.0.0)\n",
"Requirement already satisfied: colorama in c:\\users\\legion\\anaconda3\\envs\\research_summary\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.4.6)\n",
"Requirement already satisfied: decorator in c:\\users\\legion\\anaconda3\\envs\\research_summary\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (5.1.1)\n",
"Requirement already satisfied: jedi>=0.16 in c:\\users\\legion\\anaconda3\\envs\\research_summary\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.19.2)\n",
"Requirement already satisfied: matplotlib-inline in c:\\users\\legion\\anaconda3\\envs\\research_summary\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.1.7)\n",
"Requirement already satisfied: prompt_toolkit<3.1.0,>=3.0.41 in c:\\users\\legion\\anaconda3\\envs\\research_summary\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (3.0.48)\n",
"Requirement already satisfied: pygments>=2.4.0 in c:\\users\\legion\\anaconda3\\envs\\research_summary\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (2.18.0)\n",
"Requirement already satisfied: stack_data in c:\\users\\legion\\anaconda3\\envs\\research_summary\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.6.3)\n",
"Requirement already satisfied: typing_extensions>=4.6 in c:\\users\\legion\\anaconda3\\envs\\research_summary\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (4.12.2)\n",
"Requirement already satisfied: cffi>=1.12 in c:\\users\\legion\\anaconda3\\envs\\research_summary\\lib\\site-packages (from cryptography>=36.0.0->pdfminer.six==20231228->pdfplumber) (1.17.1)\n",
"Requirement already satisfied: parso<0.9.0,>=0.8.4 in c:\\users\\legion\\anaconda3\\envs\\research_summary\\lib\\site-packages (from jedi>=0.16->ipython>=6.1.0->ipywidgets) (0.8.4)\n",
"Requirement already satisfied: wcwidth in c:\\users\\legion\\anaconda3\\envs\\research_summary\\lib\\site-packages (from prompt_toolkit<3.1.0,>=3.0.41->ipython>=6.1.0->ipywidgets) (0.2.13)\n",
"Requirement already satisfied: executing>=1.2.0 in c:\\users\\legion\\anaconda3\\envs\\research_summary\\lib\\site-packages (from stack_data->ipython>=6.1.0->ipywidgets) (2.1.0)\n",
"Requirement already satisfied: asttokens>=2.1.0 in c:\\users\\legion\\anaconda3\\envs\\research_summary\\lib\\site-packages (from stack_data->ipython>=6.1.0->ipywidgets) (3.0.0)\n",
"Requirement already satisfied: pure_eval in c:\\users\\legion\\anaconda3\\envs\\research_summary\\lib\\site-packages (from stack_data->ipython>=6.1.0->ipywidgets) (0.2.3)\n",
"Requirement already satisfied: pycparser in c:\\users\\legion\\anaconda3\\envs\\research_summary\\lib\\site-packages (from cffi>=1.12->cryptography>=36.0.0->pdfminer.six==20231228->pdfplumber) (2.22)\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"pip install ipywidgets pdfplumber"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "d8cd8556-ad86-4949-9f15-09de2b8c712b",
"metadata": {},
"outputs": [],
"source": [
"import pdfplumber\n",
"from ipywidgets import widgets\n",
"from io import BytesIO"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "0eba3cee-d85c-4d75-9b27-70c8cd7587b1",
"metadata": {},
"outputs": [],
"source": [
"from IPython.display import display, Markdown"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "53e270e1-c2e6-4bcc-9ada-90c059cd5a51",
"metadata": {},
"outputs": [],
"source": [
"def messages_for(user_prompt):\n",
" return [\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt}\n",
" ]"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "2f1807ec-c10b-4d26-9bee-89bd7a4bbb95",
"metadata": {},
"outputs": [],
"source": [
"def summarize(user_prompt):\n",
" # Generate messages using the user_prompt\n",
" messages = messages_for(user_prompt)\n",
" try:\n",
" response = openai.chat.completions.create(\n",
" model=\"gpt-4o-mini\", # Correct model name\n",
" messages=messages,\n",
" max_tokens = 1000 # Pass the generated messages\n",
" )\n",
" # Return the content from the API response correctly\n",
" return response.choices[0].message.content\n",
" except Exception as e:\n",
" # Instead of printing, return an error message that can be displayed\n",
" return f\"Error in OpenAI API call: {e}\""
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "0dee8345-4eec-4a9c-ac4e-ad70e13cea44",
"metadata": {},
"outputs": [],
"source": [
"upload_widget = widgets.FileUpload(\n",
" accept='.pdf', \n",
" multiple=False,\n",
" description='Upload PDF',\n",
" layout=widgets.Layout(width='300px',height = '100px', border='2px dashed #cccccc', padding='10px')\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "1ff9c7b9-1a3a-4128-a33f-0e5bb2a93d33",
"metadata": {},
"outputs": [],
"source": [
"def extract_text_and_generate_summary(change):\n",
" print(\"extracting text\")\n",
" if upload_widget.value:\n",
" # Extract the first uploaded file\n",
" uploaded_file = list(upload_widget.value)[0]\n",
" pdf_file = uploaded_file['content']\n",
"\n",
" # Extract text from the PDF\n",
" try:\n",
" with pdfplumber.open(BytesIO(pdf_file)) as pdf:\n",
" extracted_text = \"\\n\".join(page.extract_text() for page in pdf.pages)\n",
"\n",
" # Generate the user prompt\n",
" user_prompt = (\n",
" f\"You are looking at the text from a research paper. Summarize it in no more than 1000 words. \"\n",
" f\"The output should be in markdown.\\n\\n{extracted_text}\"\n",
" )\n",
"\n",
" # Get the summarized response\n",
" response = summarize(user_prompt)\n",
" \n",
" if response:\n",
" # Use IPython's display method to show markdown below the cell\n",
" display(Markdown(response))\n",
" \n",
" except Exception as e:\n",
" # If there's an error, display it using Markdown\n",
" display(Markdown(f\"**Error:** {str(e)}\"))\n",
"\n",
" # Reset the upload widget\n",
" upload_widget.value = ()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "0c16fe3f-704e-4a87-acd9-42c4e6b0d2fa",
"metadata": {},
"outputs": [],
"source": [
"upload_widget.observe(extract_text_and_generate_summary, names='value')"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "c2c2d2b2-1264-42d9-9271-c4700b4df80a",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "7304350377d845e78a9a758235e5eba1",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FileUpload(value=(), accept='.pdf', description='Upload PDF', layout=Layout(border_bottom='2px dashed #cccccc'…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"display(upload_widget)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "70c76b90-e626-44b3-8d1f-6e995e8a938d",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,206 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 208,
"id": "f61139a1-40e1-4273-b9a6-5a0a9d63a9bd",
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"import json\n",
"from reportlab.lib.pagesizes import letter\n",
"from reportlab.pdfgen import canvas\n",
"from IPython.display import display, FileLink\n",
"from IPython.display import display, HTML, FileLink\n",
"from reportlab.lib.pagesizes import A4"
]
},
{
"cell_type": "code",
"execution_count": 80,
"id": "e0858b96-fd41-4911-a333-814e4ed23279",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting reportlab\n",
" Downloading reportlab-4.2.5-py3-none-any.whl.metadata (1.5 kB)\n",
"Requirement already satisfied: pillow>=9.0.0 in c:\\users\\legion\\anaconda3\\envs\\to_do_list\\lib\\site-packages (from reportlab) (11.0.0)\n",
"Collecting chardet (from reportlab)\n",
" Downloading chardet-5.2.0-py3-none-any.whl.metadata (3.4 kB)\n",
"Downloading reportlab-4.2.5-py3-none-any.whl (1.9 MB)\n",
" ---------------------------------------- 0.0/1.9 MB ? eta -:--:--\n",
" ---------------- ----------------------- 0.8/1.9 MB 6.7 MB/s eta 0:00:01\n",
" ---------------------------------------- 1.9/1.9 MB 11.9 MB/s eta 0:00:00\n",
"Downloading chardet-5.2.0-py3-none-any.whl (199 kB)\n",
"Installing collected packages: chardet, reportlab\n",
"Successfully installed chardet-5.2.0 reportlab-4.2.5\n"
]
}
],
"source": [
"!pip install reportlab"
]
},
{
"cell_type": "code",
"execution_count": 220,
"id": "62cc9d37-c801-4e8a-ad2c-7b1450725a10",
"metadata": {},
"outputs": [],
"source": [
"OLLAMA_API = \"http://localhost:11434/api/chat\"\n",
"HEADERS = {\"Content-Type\":\"application/json\"}\n",
"MODEL = \"llama3.2\""
]
},
{
"cell_type": "code",
"execution_count": 249,
"id": "525a81e7-30f8-4db7-bc8d-29948195bd4f",
"metadata": {},
"outputs": [],
"source": [
"system_prompt = \"\"\"You are a to-do list generator. Based on the user's input, you will create a clear and descriptive to-do\n",
"list using bullet points. Only generate the to-do list as bullet points with some explaination and time fraame only if asked for and nothing else. \n",
"Be a little descriptive.\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 315,
"id": "7fca3303-3add-468a-a6bd-be7a4d72c811",
"metadata": {},
"outputs": [],
"source": [
"def generate_to_do_list(task_description):\n",
" payload = {\n",
" \"model\": MODEL,\n",
" \"messages\": [\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": task_description}\n",
" ],\n",
" \"stream\": False\n",
" }\n",
"\n",
" response = requests.post(OLLAMA_API, json=payload, headers=HEADERS)\n",
"\n",
" if response.status_code == 200:\n",
" try:\n",
" json_response = response.json()\n",
" to_do_list = json_response.get(\"message\", {}).get(\"content\", \"No to-do list found.\")\n",
" \n",
" formatted_output = \"Your To-Do List:\\n\\n\" + to_do_list\n",
" file_name = \"to_do_list.txt\"\n",
" \n",
" with open(file_name, \"w\", encoding=\"utf-8\") as file:\n",
" file.write(formatted_output)\n",
"\n",
" return file_name\n",
" \n",
" except Exception as e:\n",
" return f\"Error parsing JSON: {e}\"\n",
" else:\n",
" return f\"Error: {response.status_code} - {response.text}\""
]
},
{
"cell_type": "code",
"execution_count": 316,
"id": "d45d6c7e-0e89-413e-8f30-e4975ea6d043",
"metadata": {},
"outputs": [
{
"name": "stdin",
"output_type": "stream",
"text": [
"Enter the task description of the to-do list: Give me a 4-week to-do list plan for a wedding reception party.\n"
]
}
],
"source": [
"task_description = input(\"Enter the task description of the to-do list:\")"
]
},
{
"cell_type": "code",
"execution_count": 317,
"id": "5493da44-e254-4d06-b973-a8069c2fc625",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"result = generate_to_do_list(task_description)"
]
},
{
"cell_type": "code",
"execution_count": 318,
"id": "5e95c722-ce1a-4630-b21a-1e00e7ba6ab9",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<p>You can download your to-do list by clicking the link below:</p>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<a href='to_do_list.txt' target='_blank'>to_do_list.txt</a><br>"
],
"text/plain": [
"C:\\Users\\Legion\\to-do list using ollama\\to_do_list.txt"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"display(HTML(\"<p>You can download your to-do list by clicking the link below:</p>\"))\n",
"display(FileLink(result))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f3d0a44e-bca4-4944-8593-1761c2f73a70",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,623 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "d15d8294-3328-4e07-ad16-8a03e9bbfdb9",
"metadata": {},
"source": [
"# Instant Gratification\n",
"\n",
"## Your first Frontier LLM Project!\n",
"\n",
"Let's build a useful LLM solution - in a matter of minutes.\n",
"\n",
"By the end of this course, you will have built an autonomous Agentic AI solution with 7 agents that collaborate to solve a business problem. All in good time! We will start with something smaller...\n",
"\n",
"Our goal is to code a new kind of Web Browser. Give it a URL, and it will respond with a summary. The Reader's Digest of the internet!!\n",
"\n",
"Before starting, you should have completed the setup for [PC](../SETUP-PC.md) or [Mac](../SETUP-mac.md) and you hopefully launched this jupyter lab from within the project root directory, with your environment activated.\n",
"\n",
"## If you're new to Jupyter Lab\n",
"\n",
"Welcome to the wonderful world of Data Science experimentation! Once you've used Jupyter Lab, you'll wonder how you ever lived without it. Simply click in each \"cell\" with code in it, such as the cell immediately below this text, and hit Shift+Return to execute that cell. As you wish, you can add a cell with the + button in the toolbar, and print values of variables, or try out variations. \n",
"\n",
"I've written a notebook called [Guide to Jupyter](Guide%20to%20Jupyter.ipynb) to help you get more familiar with Jupyter Labs, including adding Markdown comments, using `!` to run shell commands, and `tqdm` to show progress.\n",
"\n",
"If you prefer to work in IDEs like VSCode or Pycharm, they both work great with these lab notebooks too. \n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "4e2a9393-7767-488e-a8bf-27c12dca35bd",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"\n",
"import os\n",
"import requests\n",
"from dotenv import load_dotenv\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown, display\n",
"from openai import OpenAI\n",
"\n",
"# If you get an error running this cell, then please head over to the troubleshooting notebook!"
]
},
{
"cell_type": "markdown",
"id": "6900b2a8-6384-4316-8aaa-5e519fca4254",
"metadata": {},
"source": [
"# Connecting to OpenAI\n",
"\n",
"The next cell is where we load in the environment variables in your `.env` file and connect to OpenAI.\n",
"\n",
"## Troubleshooting if you have problems:\n",
"\n",
"Head over to the [troubleshooting](troubleshooting.ipynb) notebook in this folder for step by step code to identify the root cause and fix it!\n",
"\n",
"If you make a change, try restarting the \"Kernel\" (the python process sitting behind this notebook) by Kernel menu >> Restart Kernel and Clear Outputs of All Cells. Then try this notebook again, starting at the top.\n",
"\n",
"Or, contact me! Message me or email ed@edwarddonner.com and we will get this to work.\n",
"\n",
"Any concerns about API costs? See my notes in the README - costs should be minimal, and you can control it at every point. You can also use Ollama as a free alternative, which we discuss during Day 2."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7b87cadb-d513-4303-baee-a37b6f938e4d",
"metadata": {},
"outputs": [],
"source": [
"# Load environment variables in a file called .env\n",
"\n",
"load_dotenv()\n",
"api_key = os.getenv('OPENAI_API_KEY')\n",
"\n",
"# Check the key\n",
"\n",
"if not api_key:\n",
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
"elif not api_key.startswith(\"sk-proj-\"):\n",
" print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
"elif api_key.strip() != api_key:\n",
" print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
"else:\n",
" print(\"API key found and looks good so far!\")\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "019974d9-f3ad-4a8a-b5f9-0a3719aea2d3",
"metadata": {},
"outputs": [],
"source": [
"openai = OpenAI()\n",
"\n",
"# If this doesn't work, try Kernel menu >> Restart Kernel and Clear Outputs Of All Cells, then run the cells from the top of this notebook down.\n",
"# If it STILL doesn't work (horrors!) then please see the troubleshooting notebook, or try the below line instead:\n",
"# openai = OpenAI(api_key=\"your-key-here-starting-sk-proj-\")"
]
},
{
"cell_type": "markdown",
"id": "442fc84b-0815-4f40-99ab-d9a5da6bda91",
"metadata": {},
"source": [
"# Let's make a quick call to a Frontier model to get started, as a preview!"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a58394bf-1e45-46af-9bfd-01e24da6f49a",
"metadata": {},
"outputs": [],
"source": [
"# To give you a preview -- calling OpenAI with these messages is this easy:\n",
"\n",
"message = \"Hello, GPT! This is my first ever message to you! Hi!\"\n",
"response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=[{\"role\":\"user\", \"content\":message}])\n",
"print(response.choices[0].message.content)"
]
},
{
"cell_type": "markdown",
"id": "2aa190e5-cb31-456a-96cc-db109919cd78",
"metadata": {},
"source": [
"## OK onwards with our first project"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "c5e793b2-6775-426a-a139-4848291d0463",
"metadata": {},
"outputs": [],
"source": [
"# A class to represent a Webpage\n",
"# If you're not familiar with Classes, check out the \"Intermediate Python\" notebook\n",
"\n",
"# Some websites need you to use proper headers when fetching them:\n",
"headers = {\n",
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
"}\n",
"\n",
"class Website:\n",
"\n",
" def __init__(self, url):\n",
" \"\"\"\n",
" Create this Website object from the given url using the BeautifulSoup library\n",
" \"\"\"\n",
" self.url = url\n",
" response = requests.get(url, headers=headers)\n",
" soup = BeautifulSoup(response.content, 'html.parser')\n",
" self.title = soup.title.string if soup.title else \"No title found\"\n",
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
" irrelevant.decompose()\n",
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2ef960cf-6dc2-4cda-afb3-b38be12f4c97",
"metadata": {},
"outputs": [],
"source": [
"# Let's try one out. Change the website and add print statements to follow along.\n",
"\n",
"ed = Website(\"https://edwarddonner.com\")\n",
"print(ed.title)\n",
"print(ed.text)"
]
},
{
"cell_type": "markdown",
"id": "6a478a0c-2c53-48ff-869c-4d08199931e1",
"metadata": {},
"source": [
"## Types of prompts\n",
"\n",
"You may know this already - but if not, you will get very familiar with it!\n",
"\n",
"Models like GPT4o have been trained to receive instructions in a particular way.\n",
"\n",
"They expect to receive:\n",
"\n",
"**A system prompt** that tells them what task they are performing and what tone they should use\n",
"\n",
"**A user prompt** -- the conversation starter that they should reply to"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "abdb8417-c5dc-44bc-9bee-2e059d162699",
"metadata": {},
"outputs": [],
"source": [
"# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish.\"\n",
"\n",
"system_prompt = \"You are an assistant that analyzes the contents of a website \\\n",
"and provides a short summary, ignoring text that might be navigation related. \\\n",
"Respond in markdown.\""
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "f0275b1b-7cfe-4f9d-abfa-7650d378da0c",
"metadata": {},
"outputs": [],
"source": [
"# A function that writes a User Prompt that asks for summaries of websites:\n",
"\n",
"def user_prompt_for(website):\n",
" user_prompt = f\"You are looking at a website titled {website.title}\"\n",
" user_prompt += \"\\nThe contents of this website is as follows; \\\n",
"please provide a short summary of this website in markdown. \\\n",
"If it includes news or announcements, then summarize these too.\\n\\n\"\n",
" user_prompt += website.text\n",
" return user_prompt"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "26448ec4-5c00-4204-baec-7df91d11ff2e",
"metadata": {},
"outputs": [],
"source": [
"print(user_prompt_for(ed))"
]
},
{
"cell_type": "markdown",
"id": "ea211b5f-28e1-4a86-8e52-c0b7677cadcc",
"metadata": {},
"source": [
"## Messages\n",
"\n",
"The API from OpenAI expects to receive messages in a particular structure.\n",
"Many of the other APIs share this structure:\n",
"\n",
"```\n",
"[\n",
" {\"role\": \"system\", \"content\": \"system message goes here\"},\n",
" {\"role\": \"user\", \"content\": \"user message goes here\"}\n",
"]\n",
"\n",
"To give you a preview, the next 2 cells make a rather simple call - we won't stretch the might GPT (yet!)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "f25dcd35-0cd0-4235-9f64-ac37ed9eaaa5",
"metadata": {},
"outputs": [],
"source": [
"messages = [\n",
" {\"role\": \"system\", \"content\": \"You are a snarky assistant\"},\n",
" {\"role\": \"user\", \"content\": \"What is 2 + 2?\"}\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "21ed95c5-7001-47de-a36d-1d6673b403ce",
"metadata": {},
"outputs": [],
"source": [
"# To give you a preview -- calling OpenAI with system and user messages:\n",
"\n",
"response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=messages)\n",
"print(response.choices[0].message.content)"
]
},
{
"cell_type": "markdown",
"id": "d06e8d78-ce4c-4b05-aa8e-17050c82bb47",
"metadata": {},
"source": [
"## And now let's build useful messages for GPT-4o-mini, using a function"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "0134dfa4-8299-48b5-b444-f2a8c3403c88",
"metadata": {},
"outputs": [],
"source": [
"# See how this function creates exactly the format above\n",
"\n",
"def messages_for(website):\n",
" return [\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt_for(website)}\n",
" ]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "36478464-39ee-485c-9f3f-6a4e458dbc9c",
"metadata": {},
"outputs": [],
"source": [
"# Try this out, and then try for a few more websites\n",
"\n",
"messages_for(ed)"
]
},
{
"cell_type": "markdown",
"id": "16f49d46-bf55-4c3e-928f-68fc0bf715b0",
"metadata": {},
"source": [
"## Time to bring it together - the API for OpenAI is very simple!"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "905b9919-aba7-45b5-ae65-81b3d1d78e34",
"metadata": {},
"outputs": [],
"source": [
"# And now: call the OpenAI API. You will get very familiar with this!\n",
"\n",
"def summarize(url):\n",
" website = Website(url)\n",
" response = openai.chat.completions.create(\n",
" model = \"gpt-4o-mini\",\n",
" messages = messages_for(website)\n",
" )\n",
" return response.choices[0].message.content"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "05e38d41-dfa4-4b20-9c96-c46ea75d9fb5",
"metadata": {},
"outputs": [],
"source": [
"summarize(\"https://edwarddonner.com\")"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "3d926d59-450e-4609-92ba-2d6f244f1342",
"metadata": {},
"outputs": [],
"source": [
"# A function to display this nicely in the Jupyter output, using markdown\n",
"\n",
"def display_summary(url):\n",
" summary = summarize(url)\n",
" display(Markdown(summary))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3018853a-445f-41ff-9560-d925d1774b2f",
"metadata": {},
"outputs": [],
"source": [
"display_summary(\"https://edwarddonner.com\")"
]
},
{
"cell_type": "markdown",
"id": "b3bcf6f4-adce-45e9-97ad-d9a5d7a3a624",
"metadata": {},
"source": [
"# Let's try more websites\n",
"\n",
"Note that this will only work on websites that can be scraped using this simplistic approach.\n",
"\n",
"Websites that are rendered with Javascript, like React apps, won't show up. See the community-contributions folder for a Selenium implementation that gets around this. You'll need to read up on installing Selenium (ask ChatGPT!)\n",
"\n",
"Also Websites protected with CloudFront (and similar) may give 403 errors - many thanks Andy J for pointing this out.\n",
"\n",
"But many websites will work just fine!"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "45d83403-a24c-44b5-84ac-961449b4008f",
"metadata": {},
"outputs": [],
"source": [
"display_summary(\"https://cnn.com\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "75e9fd40-b354-4341-991e-863ef2e59db7",
"metadata": {},
"outputs": [],
"source": [
"display_summary(\"https://anthropic.com\")"
]
},
{
"cell_type": "markdown",
"id": "36ed9f14-b349-40e9-a42c-b367e77f8bda",
"metadata": {},
"source": [
"## An extra exercise for those who enjoy web scraping\n",
"\n",
"You may notice that if you try `display_summary(\"https://openai.com\")` - it doesn't work! That's because OpenAI has a fancy website that uses Javascript. There are many ways around this that some of you might be familiar with. For example, Selenium is a hugely popular framework that runs a browser behind the scenes, renders the page, and allows you to query it. If you have experience with Selenium, Playwright or similar, then feel free to improve the Website class to use them. In the community-contributions folder, you'll find an example Selenium solution from a student (thank you!)"
]
},
{
"cell_type": "markdown",
"id": "eeab24dc-5f90-4570-b542-b0585aca3eb6",
"metadata": {},
"source": [
"# Sharing your code\n",
"\n",
"I'd love it if you share your code afterwards so I can share it with others! You'll notice that some students have already made changes (including a Selenium implementation) which you will find in the community-contributions folder. If you'd like add your changes to that folder, submit a Pull Request with your new versions in that folder and I'll merge your changes.\n",
"\n",
"If you're not an expert with git (and I am not!) then GPT has given some nice instructions on how to submit a Pull Request. It's a bit of an involved process, but once you've done it once it's pretty clear. As a pro-tip: it's best if you clear the outputs of your Jupyter notebooks (Edit >> Clean outputs of all cells, and then Save) for clean notebooks.\n",
"\n",
"PR instructions courtesy of an AI friend: https://chatgpt.com/share/670145d5-e8a8-8012-8f93-39ee4e248b4c"
]
},
{
"cell_type": "markdown",
"id": "0f62a788",
"metadata": {},
"source": [
"# **Web Scraping for JavaScript Website**"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dca2768e",
"metadata": {},
"outputs": [],
"source": [
"# !pip install selenium\n",
"# !pip install undetected-chromedriver"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "682eff74-55c4-4d4b-b267-703edbc293c7",
"metadata": {},
"outputs": [],
"source": [
"import undetected_chromedriver as uc\n",
"from selenium.webdriver.common.by import By\n",
"from selenium.webdriver.support.ui import WebDriverWait\n",
"from selenium.webdriver.support import expected_conditions as EC\n",
"import time\n",
"from bs4 import BeautifulSoup"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "90ca6dd0",
"metadata": {},
"outputs": [],
"source": [
"class WebsiteCrawler:\n",
" def __init__(self, url, wait_time=20, chrome_binary_path=None):\n",
" \"\"\"\n",
" Initialize the WebsiteCrawler using Selenium to scrape JavaScript-rendered content.\n",
" \"\"\"\n",
" self.url = url\n",
" self.wait_time = wait_time\n",
"\n",
" options = uc.ChromeOptions()\n",
" options.add_argument(\"--disable-gpu\")\n",
" options.add_argument(\"--no-sandbox\")\n",
" options.add_argument(\"--disable-dev-shm-usage\")\n",
" options.add_argument(\"--disable-blink-features=AutomationControlled\")\n",
" options.add_argument(\"start-maximized\")\n",
" options.add_argument(\n",
" \"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
" )\n",
" if chrome_binary_path:\n",
" options.binary_location = chrome_binary_path\n",
"\n",
" self.driver = uc.Chrome(options=options)\n",
"\n",
" try:\n",
" # Load the URL\n",
" self.driver.get(url)\n",
"\n",
" # Wait for Cloudflare or similar checks\n",
" time.sleep(10)\n",
"\n",
" # Ensure the main content is loaded\n",
" WebDriverWait(self.driver, self.wait_time).until(\n",
" EC.presence_of_element_located((By.TAG_NAME, \"main\"))\n",
" )\n",
"\n",
" # Extract the main content\n",
" main_content = self.driver.find_element(By.CSS_SELECTOR, \"main\").get_attribute(\"outerHTML\")\n",
"\n",
" # Parse with BeautifulSoup\n",
" soup = BeautifulSoup(main_content, \"html.parser\")\n",
" self.title = self.driver.title if self.driver.title else \"No title found\"\n",
" self.text = soup.get_text(separator=\"\\n\", strip=True)\n",
"\n",
" except Exception as e:\n",
" print(f\"Error occurred: {e}\")\n",
" self.title = \"Error occurred\"\n",
" self.text = \"\"\n",
"\n",
" finally:\n",
" self.driver.quit()\n"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "947eac30",
"metadata": {},
"outputs": [],
"source": [
"chrome_path = \"C:/Program Files/Google/Chrome/Application/chrome.exe\"\n",
"url = \"https://www.canva.com/\"\n"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "2cba8c91",
"metadata": {},
"outputs": [],
"source": [
"def new_summary(url, chrome_path):\n",
" web = WebsiteCrawler(url, 30, chrome_path)\n",
" response = openai.chat.completions.create(\n",
" model = \"gpt-4o-mini\",\n",
" messages = messages_for(web)\n",
" )\n",
"\n",
" web_summary = response.choices[0].message.content\n",
" \n",
" return display(Markdown(web_summary))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "da7f7b16",
"metadata": {},
"outputs": [],
"source": [
"new_summary(url, chrome_path)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "7880ce6a",
"metadata": {},
"outputs": [],
"source": [
"url = \"https://openai.com\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "337b06da",
"metadata": {},
"outputs": [],
"source": [
"new_summary(url, chrome_path)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9a5d69ea",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "llm_env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,522 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "d15d8294-3328-4e07-ad16-8a03e9bbfdb9",
"metadata": {},
"source": [
"# Welcome to your first assignment!\n",
"\n",
"Instructions are below. Please give this a try, and look in the solutions folder if you get stuck (or feel free to ask me!)"
]
},
{
"cell_type": "markdown",
"id": "ada885d9-4d42-4d9b-97f0-74fbbbfe93a9",
"metadata": {},
"source": [
"<table style=\"margin: 0; text-align: left;\">\n",
" <tr>\n",
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
" <img src=\"../resources.jpg\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
" </td>\n",
" <td>\n",
" <h2 style=\"color:#f71;\">Just before we get to the assignment --</h2>\n",
" <span style=\"color:#f71;\">I thought I'd take a second to point you at this page of useful resources for the course. This includes links to all the slides.<br/>\n",
" <a href=\"https://edwarddonner.com/2024/11/13/llm-engineering-resources/\">https://edwarddonner.com/2024/11/13/llm-engineering-resources/</a><br/>\n",
" Please keep this bookmarked, and I'll continue to add more useful links there over time.\n",
" </span>\n",
" </td>\n",
" </tr>\n",
"</table>"
]
},
{
"cell_type": "markdown",
"id": "6e9fa1fc-eac5-4d1d-9be4-541b3f2b3458",
"metadata": {},
"source": [
"# HOMEWORK EXERCISE ASSIGNMENT\n",
"\n",
"Upgrade the day 1 project to summarize a webpage to use an Open Source model running locally via Ollama rather than OpenAI\n",
"\n",
"You'll be able to use this technique for all subsequent projects if you'd prefer not to use paid APIs.\n",
"\n",
"**Benefits:**\n",
"1. No API charges - open-source\n",
"2. Data doesn't leave your box\n",
"\n",
"**Disadvantages:**\n",
"1. Significantly less power than Frontier Model\n",
"\n",
"## Recap on installation of Ollama\n",
"\n",
"Simply visit [ollama.com](https://ollama.com) and install!\n",
"\n",
"Once complete, the ollama server should already be running locally. \n",
"If you visit: \n",
"[http://localhost:11434/](http://localhost:11434/)\n",
"\n",
"You should see the message `Ollama is running`. \n",
"\n",
"If not, bring up a new Terminal (Mac) or Powershell (Windows) and enter `ollama serve` \n",
"And in another Terminal (Mac) or Powershell (Windows), enter `ollama pull llama3.2` \n",
"Then try [http://localhost:11434/](http://localhost:11434/) again.\n",
"\n",
"If Ollama is slow on your machine, try using `llama3.2:1b` as an alternative. Run `ollama pull llama3.2:1b` from a Terminal or Powershell, and change the code below from `MODEL = \"llama3.2\"` to `MODEL = \"llama3.2:1b\"`"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "4e2a9393-7767-488e-a8bf-27c12dca35bd",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"\n",
"import requests\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown, display"
]
},
{
"cell_type": "raw",
"id": "07e106bd-10c5-4365-b85b-397b5f059656",
"metadata": {},
"source": [
"# Constants\n",
"\n",
"OLLAMA_API = \"http://localhost:11434/api/chat\"\n",
"HEADERS = {\"Content-Type\": \"application/json\"}\n",
"MODEL = \"llama3.2\""
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "dac0a679-599c-441f-9bf2-ddc73d35b940",
"metadata": {},
"outputs": [],
"source": [
"# Create a messages list using the same format that we used for OpenAI\n",
"\n",
"messages = [\n",
" {\"role\": \"user\", \"content\": \"Describe some of the business applications of Generative AI\"}\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "7bb9c624-14f0-4945-a719-8ddb64f66f47",
"metadata": {},
"outputs": [],
"source": [
"payload = {\n",
" \"model\": MODEL,\n",
" \"messages\": messages,\n",
" \"stream\": False\n",
" }"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "42b9f644-522d-4e05-a691-56e7658c0ea9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Generative AI (Artificial Intelligence) has numerous business applications across various industries. Here are some examples:\n",
"\n",
"1. **Content Generation**: Generative AI can create high-quality content such as articles, social media posts, product descriptions, and more. This can help businesses save time and resources on content creation.\n",
"2. **Product Design**: Generative AI can be used to design new products, such as fashion items, jewelry, or electronics. It can also generate 3D models and prototypes, reducing the need for manual design and prototyping.\n",
"3. **Image and Video Generation**: Generative AI can create realistic images and videos that can be used in marketing campaigns, advertising, and social media. This can help businesses create engaging visual content without requiring extensive photography or videography skills.\n",
"4. **Chatbots and Virtual Assistants**: Generative AI can power chatbots and virtual assistants that provide customer support, answer frequently asked questions, and even engage in basic conversations.\n",
"5. **Predictive Maintenance**: Generative AI can analyze sensor data from machines and predict when maintenance is needed, reducing downtime and increasing efficiency.\n",
"6. **Personalized Recommendations**: Generative AI can analyze customer behavior and preferences to generate personalized product recommendations, improving the overall shopping experience.\n",
"7. **Customer Segmentation**: Generative AI can help businesses segment their customers based on their behavior, demographics, and preferences, enabling targeted marketing campaigns.\n",
"8. **Automated Writing Assistance**: Generative AI can assist writers with ideas, suggestions, and even full-text writing, helping to boost productivity and creativity.\n",
"9. **Data Analysis and Visualization**: Generative AI can analyze large datasets and generate insights, visualizations, and predictions that can inform business decisions.\n",
"10. **Creative Collaboration**: Generative AI can collaborate with human creatives, such as artists, designers, and writers, to generate new ideas, concepts, and content.\n",
"\n",
"Some specific industries where Generative AI is being applied include:\n",
"\n",
"1. **Marketing and Advertising**: generating personalized ads, content, and messaging.\n",
"2. **Finance and Banking**: automating financial analysis, risk assessment, and customer service.\n",
"3. **Healthcare**: generating medical images, analyzing patient data, and predicting disease outcomes.\n",
"4. **Manufacturing and Supply Chain**: optimizing production workflows, predicting demand, and identifying potential bottlenecks.\n",
"5. **Education**: creating personalized learning experiences, grading assignments, and developing educational content.\n",
"\n",
"These are just a few examples of the many business applications of Generative AI. As the technology continues to evolve, we can expect to see even more innovative uses across various industries.\n"
]
}
],
"source": [
"response = requests.post(OLLAMA_API, json=payload, headers=HEADERS)\n",
"print(response.json()['message']['content'])"
]
},
{
"cell_type": "markdown",
"id": "6a021f13-d6a1-4b96-8e18-4eae49d876fe",
"metadata": {},
"source": [
"# Introducing the ollama package\n",
"\n",
"And now we'll do the same thing, but using the elegant ollama python package instead of a direct HTTP call.\n",
"\n",
"Under the hood, it's making the same call as above to the ollama server running at localhost:11434"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "7745b9c4-57dc-4867-9180-61fa5db55eb8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Generative AI has numerous business applications across various industries. Here are some examples:\n",
"\n",
"1. **Content Generation**: Generative AI can be used to generate high-quality content such as articles, social media posts, product descriptions, and more. This can save time and resources for businesses that need to produce a large volume of content.\n",
"2. **Product Design**: Generative AI can be used to design new products, such as furniture, electronics, and other consumer goods. It can also help optimize product designs by generating multiple versions and selecting the most suitable one based on various criteria.\n",
"3. **Marketing Automation**: Generative AI can be used to create personalized marketing campaigns, such as email marketing automation, social media ads, and more. This can help businesses tailor their marketing efforts to specific customer segments and improve engagement rates.\n",
"4. **Image and Video Editing**: Generative AI can be used to edit images and videos, such as removing background noise, correcting color casts, and enhancing video quality. This can save time and resources for businesses that need to create high-quality visual content.\n",
"5. **Chatbots and Virtual Assistants**: Generative AI can be used to create chatbots and virtual assistants that can understand natural language and respond accordingly. This can help businesses provide better customer service and improve user experience.\n",
"6. **Predictive Analytics**: Generative AI can be used to analyze large datasets and generate predictive models that can forecast future trends and behaviors. This can help businesses make data-driven decisions and stay ahead of the competition.\n",
"7. **Customer Segmentation**: Generative AI can be used to segment customers based on their behavior, demographics, and preferences. This can help businesses tailor their marketing efforts and improve customer engagement.\n",
"8. **Language Translation**: Generative AI can be used to translate languages in real-time, which can help businesses communicate with international clients and customers more effectively.\n",
"9. **Music Composition**: Generative AI can be used to compose music for various applications such as advertising, film scoring, and video game soundtracks.\n",
"10. **Financial Modeling**: Generative AI can be used to create financial models that can predict future revenue streams, costs, and other financial metrics. This can help businesses make more accurate predictions and inform better investment decisions.\n",
"\n",
"Some of the industries that are already leveraging generative AI include:\n",
"\n",
"* E-commerce\n",
"* Healthcare\n",
"* Finance\n",
"* Marketing\n",
"* Education\n",
"* Entertainment\n",
"* Manufacturing\n",
"\n",
"These applications have the potential to transform various business processes, improve customer experiences, and drive innovation in various sectors.\n"
]
}
],
"source": [
"import ollama\n",
"\n",
"response = ollama.chat(model=MODEL, messages=messages)\n",
"print(response['message']['content'])"
]
},
{
"cell_type": "markdown",
"id": "a4704e10-f5fb-4c15-a935-f046c06fb13d",
"metadata": {},
"source": [
"## Alternative approach - using OpenAI python library to connect to Ollama"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "23057e00-b6fc-4678-93a9-6b31cb704bff",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Generative AI has numerous business applications across various industries, transforming the way companies operate, create products, and interact with customers. Some key applications include:\n",
"\n",
"1. **Content Generation**: Automate content creation for marketing materials, such as blog posts, product descriptions, social media posts, and more, using Generative AI-powered tools.\n",
"2. **Product Design and Prototyping**: Use Generative AI to design new products, furniture, or other innovative solutions, reducing design time and costs while increasing creativity.\n",
"3. **Customer Experience (CX) Tools**: Leverage Generative AI to create personalized customer experiences, such as chatbots that can respond to customer queries and provide tailored recommendations.\n",
"4. **Predictive Maintenance**: Use Generative AI to analyze sensor data, identify potential issues, and predict maintenance needs for equipment, reducing downtime and increasing overall efficiency.\n",
"5. **Personalized Marketing**: Use Generative AI to create targeted marketing campaigns based on individual customer preferences, behaviors, and demographics.\n",
"6. **Content Optimization**: Utilize Generative AI to optimize content for better performance in search engine results pages (SERPs), ensuring improved visibility and traffic.\n",
"7. **Brand Storytelling**: Automate the creation of brand stories, taglines, and overall brand narrative using Generative AI-powered tools.\n",
"8. **Financial Modeling and Forecasting**: Use Generative AI to create financial models, forecasts, and predictions for businesses, helping them make data-driven decisions.\n",
"9. **Supply Chain Optimization**: Leverage Generative AI to optimize supply chain operations, predicting demand, reducing inventory levels, and streamlining logistics.\n",
"10. **Automated Transcription and Translation**: Use Generative AI to automate the transcription of audio and video files into written text, as well as translate materials across languages.\n",
"11. **Digital Asset Management**: Utilize Generative AI to manage digital assets, such as images, videos, and documents, and automatically generate metadata for easy search and retrieval.\n",
"12. **Chatbots and Virtual Assistants**: Create more advanced chatbots using Generative AI that can understand context, emotions, and intent, providing better customer service experiences.\n",
"\n",
"In healthcare, Generative AI is being applied to:\n",
"\n",
"1. Medical Imaging Analysis\n",
"2. Personalized Medicine\n",
"3. Patient Data Analysis\n",
"\n",
"In education, Generative AI is used in:\n",
"\n",
"1. Adaptive Learning Systems\n",
"2. Automated Grading and Feedback\n",
"\n",
"Generative AI has numerous applications across various industries, from creative content generation to predictive maintenance and supply chain optimization.\n",
"\n",
"Keep in mind that these are just a few examples of the many business applications of Generative AI as this technology continues to evolve at a rapid pace.\n"
]
}
],
"source": [
"# There's actually an alternative approach that some people might prefer\n",
"# You can use the OpenAI client python library to call Ollama:\n",
"\n",
"from openai import OpenAI\n",
"ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
"\n",
"response = ollama_via_openai.chat.completions.create(\n",
" model=MODEL,\n",
" messages=messages\n",
")\n",
"\n",
"print(response.choices[0].message.content)"
]
},
{
"cell_type": "markdown",
"id": "1622d9bb-5c68-4d4e-9ca4-b492c751f898",
"metadata": {},
"source": [
"# NOW the exercise for you\n",
"\n",
"Take the code from day1 and incorporate it here, to build a website summarizer that uses Llama 3.2 running locally instead of OpenAI; use either of the above approaches."
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "de923314-a427-4199-b1f9-0e60f85114c3",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"\n",
"import requests\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown, display\n",
"\n",
"# A class to represent a Webpage\n",
"# If you're not familiar with Classes, check out the \"Intermediate Python\" notebook\n",
"\n",
"# Some websites need you to use proper headers when fetching them:\n",
"headers = {\n",
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
"}\n",
"\n",
"class Website:\n",
"\n",
" def __init__(self, url):\n",
" \"\"\"\n",
" Create this Website object from the given url using the BeautifulSoup library\n",
" \"\"\"\n",
" self.url = url\n",
" response = requests.get(url, headers=headers)\n",
" soup = BeautifulSoup(response.content, 'html.parser')\n",
" self.title = soup.title.string if soup.title else \"No title found\"\n",
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
" irrelevant.decompose()\n",
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "0cedada6-adc6-40dc-bdf3-bc8a3b6b3826",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Home\n",
"Outsmart\n",
"An arena that pits LLMs against each other in a battle of diplomacy and deviousness\n",
"About\n",
"Posts\n",
"Well, hi there.\n",
"Im Ed. I like writing code and experimenting with LLMs, and hopefully youre here because you do too. I also enjoy DJing (but Im badly out of practice), amateur electronic music production (\n",
"very\n",
"amateur) and losing myself in\n",
"Hacker News\n",
", nodding my head sagely to things I only half understand.\n",
"Im the co-founder and CTO of\n",
"Nebula.io\n",
". Were applying AI to a field where it can make a massive, positive impact: helping people discover their potential and pursue their reason for being. Recruiters use our product today to source, understand, engage and manage talent. Im previously the founder and CEO of AI startup untapt,\n",
"acquired in 2021\n",
".\n",
"We work with groundbreaking, proprietary LLMs verticalized for talent, weve\n",
"patented\n",
"our matching model, and our award-winning platform has happy customers and tons of press coverage.\n",
"Connect\n",
"with me for more!\n",
"November 13, 2024\n",
"Mastering AI and LLM Engineering Resources\n",
"October 16, 2024\n",
"From Software Engineer to AI Data Scientist resources\n",
"August 6, 2024\n",
"Outsmart LLM Arena a battle of diplomacy and deviousness\n",
"June 26, 2024\n",
"Choosing the Right LLM: Toolkit and Resources\n",
"Navigation\n",
"Home\n",
"Outsmart\n",
"An arena that pits LLMs against each other in a battle of diplomacy and deviousness\n",
"About\n",
"Posts\n",
"Get in touch\n",
"ed [at] edwarddonner [dot] com\n",
"www.edwarddonner.com\n",
"Follow me\n",
"LinkedIn\n",
"Twitter\n",
"Facebook\n",
"Subscribe to newsletter\n",
"Type your email…\n",
"Subscribe\n"
]
}
],
"source": [
"# Let's try one out. Change the website and add print statements to follow along.\n",
"\n",
"web_res = Website(\"https://edwarddonner.com\")\n",
"print(web_res.text)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "64d26055-756b-4095-a1d1-298fdf4fd8f1",
"metadata": {},
"outputs": [],
"source": [
"\n",
"# Constants\n",
"\n",
"OLLAMA_API = \"http://localhost:11434/api/chat\"\n",
"HEADERS = {\"Content-Type\": \"application/json\"}\n",
"MODEL = \"llama3.2\"\n"
]
},
{
"cell_type": "code",
"execution_count": 52,
"id": "65b08550-7506-415f-8612-e2395d6e145d",
"metadata": {},
"outputs": [],
"source": [
"\n",
"# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish.\"\n",
"\n",
"system_prompt = \"You are an helper that assist user to provide crisp summary\\\n",
"of the website they pass in, respond with key points\"\n",
"\n",
"# A function that writes a User Prompt that asks for summaries of websites:\n",
"\n",
"def user_prompt_for(website):\n",
" user_prompt = f\"You are looking at a website titled {website.title}\"\n",
" user_prompt += \"\\nThe contents of this website is as follows; \\\n",
"please provide a short summary of this website in markdown. \\\n",
"If it includes news or announcements, then summarize these too with start bulletin.\\n\\n\"\n",
" user_prompt += website.text\n",
" return user_prompt\n"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "36a0a2d0-f07a-40ac-a065-b713cdd5c028",
"metadata": {},
"outputs": [],
"source": [
"# See how this function creates exactly the format above\n",
"\n",
"def messages_for(website):\n",
" return [\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt_for(website)}\n",
" ]\n"
]
},
{
"cell_type": "code",
"execution_count": 50,
"id": "8c2b20ea-6a8e-41c9-be3b-f24a5b29e8de",
"metadata": {},
"outputs": [],
"source": [
"#website search\n",
"\n",
"web_msg=Website(\"https://www.cricbuzz.com/cricket-match-squads/91796/aus-vs-ind-3rd-test-india-tour-of-australia-2024-25\")\n",
"messages=messages_for(web_msg)\n",
"\n",
"payload = {\n",
" \"model\": MODEL,\n",
" \"messages\": messages,\n",
" \"stream\": False\n",
" }"
]
},
{
"cell_type": "code",
"execution_count": 54,
"id": "e5636b3b-7763-4f9c-ab18-88aa25b50de6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"**Summary of the Website**\n",
"=========================\n",
"\n",
"* The website provides live updates and information about the 3rd Test match between Australia and India as part of India's tour of Australia in the 2024-25 season.\n",
"* It includes news, scores, stats, and analysis from the match.\n",
"* The website is affiliated with Cricbuzz.com, a popular online cricket platform.\n",
"\n",
"**News and Announcements**\n",
"==========================\n",
"\n",
"* **Rashid Khan to miss the rest of the series**: Australian all-rounder Mitchell Marsh's teammate Rashid Khan has been ruled out of the remaining Tests due to a knee injury.\n",
"* **Bumrah to feature in the third Test**: Indian fast bowler Jasprit Bumrah is expected to return for the third Test, which starts on January 5 at the Sydney Cricket Ground.\n"
]
}
],
"source": [
"#Using Ollama to run it in the local\n",
"response = requests.post(OLLAMA_API, json=payload, headers=HEADERS)\n",
"print(response.json()['message']['content'])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,513 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "a98030af-fcd1-4d63-a36e-38ba053498fa",
"metadata": {},
"source": [
"# A full business solution\n",
"\n",
"## Now we will take our project from Day 1 to the next level\n",
"\n",
"### BUSINESS CHALLENGE:\n",
"\n",
"Create a product that builds a Brochure for a company to be used for prospective clients, investors and potential recruits.\n",
"\n",
"We will be provided a company name and their primary website.\n",
"\n",
"See the end of this notebook for examples of real-world business applications.\n",
"\n",
"And remember: I'm always available if you have problems or ideas! Please do reach out."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "d5b08506-dc8b-4443-9201-5f1848161363",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"# If these fail, please check you're running from an 'activated' environment with (llms) in the command prompt\n",
"\n",
"import os\n",
"import requests\n",
"import json\n",
"from typing import List\n",
"from dotenv import load_dotenv\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown, display, update_display\n",
"from openai import OpenAI"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fc5d8880-f2ee-4c06-af16-ecbc0262af61",
"metadata": {},
"outputs": [],
"source": [
"# Initialize and constants\n",
"\n",
"load_dotenv()\n",
"api_key = os.getenv('OPENAI_API_KEY')\n",
"\n",
"if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:\n",
" print(\"API key looks good so far\")\n",
"else:\n",
" print(\"There might be a problem with your API key? Please visit the troubleshooting notebook!\")\n",
" \n",
"MODEL = 'gpt-4o-mini'\n",
"openai = OpenAI()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "106dd65e-90af-4ca8-86b6-23a41840645b",
"metadata": {},
"outputs": [],
"source": [
"# A class to represent a Webpage\n",
"\n",
"# Some websites need you to use proper headers when fetching them:\n",
"headers = {\n",
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
"}\n",
"\n",
"class Website:\n",
" \"\"\"\n",
" A utility class to represent a Website that we have scraped, now with links\n",
" \"\"\"\n",
"\n",
" def __init__(self, url):\n",
" self.url = url\n",
" response = requests.get(url, headers=headers)\n",
" self.body = response.content\n",
" soup = BeautifulSoup(self.body, 'html.parser')\n",
" self.title = soup.title.string if soup.title else \"No title found\"\n",
" if soup.body:\n",
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
" irrelevant.decompose()\n",
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n",
" else:\n",
" self.text = \"\"\n",
" links = [link.get('href') for link in soup.find_all('a')]\n",
" self.links = [link for link in links if link]\n",
"\n",
" def get_contents(self):\n",
" return f\"Webpage Title:\\n{self.title}\\nWebpage Contents:\\n{self.text}\\n\\n\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e30d8128-933b-44cc-81c8-ab4c9d86589a",
"metadata": {},
"outputs": [],
"source": [
"ed = Website(\"https://edwarddonner.com\")\n",
"ed.links"
]
},
{
"cell_type": "markdown",
"id": "1771af9c-717a-4fca-bbbe-8a95893312c3",
"metadata": {},
"source": [
"## First step: Have GPT-4o-mini figure out which links are relevant\n",
"\n",
"### Use a call to gpt-4o-mini to read the links on a webpage, and respond in structured JSON. \n",
"It should decide which links are relevant, and replace relative links such as \"/about\" with \"https://company.com/about\". \n",
"We will use \"one shot prompting\" in which we provide an example of how it should respond in the prompt.\n",
"\n",
"This is an excellent use case for an LLM, because it requires nuanced understanding. Imagine trying to code this without LLMs by parsing and analyzing the webpage - it would be very hard!\n",
"\n",
"Sidenote: there is a more advanced technique called \"Structured Outputs\" in which we require the model to respond according to a spec. We cover this technique in Week 8 during our autonomous Agentic AI project."
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "6957b079-0d96-45f7-a26a-3487510e9b35",
"metadata": {},
"outputs": [],
"source": [
"link_system_prompt = \"You are provided with a list of links found on a webpage. \\\n",
"You are able to decide which of the links would be most relevant to include in a brochure about the company, \\\n",
"such as links to an About page, or a Company page, or Careers/Jobs pages.\\n\"\n",
"link_system_prompt += \"You should respond in JSON as in this example:\"\n",
"link_system_prompt += \"\"\"\n",
"{\n",
" \"links\": [\n",
" {\"type\": \"about page\", \"url\": \"https://full.url/goes/here/about\"},\n",
" {\"type\": \"careers page\": \"url\": \"https://another.full.url/careers\"}\n",
" ]\n",
"}\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b97e4068-97ed-4120-beae-c42105e4d59a",
"metadata": {},
"outputs": [],
"source": [
"print(link_system_prompt)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "8e1f601b-2eaf-499d-b6b8-c99050c9d6b3",
"metadata": {},
"outputs": [],
"source": [
"def get_links_user_prompt(website):\n",
" user_prompt = f\"Here is the list of links on the website of {website.url} - \"\n",
" user_prompt += \"please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \\\n",
"Do not include Terms of Service, Privacy, email links.\\n\"\n",
" user_prompt += \"Links (some might be relative links):\\n\"\n",
" user_prompt += \"\\n\".join(website.links)\n",
" return user_prompt"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6bcbfa78-6395-4685-b92c-22d592050fd7",
"metadata": {},
"outputs": [],
"source": [
"print(get_links_user_prompt(ed))"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "a29aca19-ca13-471c-a4b4-5abbfa813f69",
"metadata": {},
"outputs": [],
"source": [
"def get_links(url):\n",
" website = Website(url)\n",
" response = openai.chat.completions.create(\n",
" model=MODEL,\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": link_system_prompt},\n",
" {\"role\": \"user\", \"content\": get_links_user_prompt(website)}\n",
" ],\n",
" response_format={\"type\": \"json_object\"}\n",
" )\n",
" result = response.choices[0].message.content\n",
" return json.loads(result)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "74a827a0-2782-4ae5-b210-4a242a8b4cc2",
"metadata": {},
"outputs": [],
"source": [
"# Anthropic has made their site harder to scrape, so I'm using HuggingFace..\n",
"\n",
"huggingface = Website(\"https://huggingface.co\")\n",
"huggingface.links"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d3d583e2-dcc4-40cc-9b28-1e8dbf402924",
"metadata": {},
"outputs": [],
"source": [
"get_links(\"https://huggingface.co\")"
]
},
{
"cell_type": "markdown",
"id": "0d74128e-dfb6-47ec-9549-288b621c838c",
"metadata": {},
"source": [
"## Second step: make the brochure!\n",
"\n",
"Assemble all the details into another prompt to GPT4-o"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "85a5b6e2-e7ef-44a9-bc7f-59ede71037b5",
"metadata": {},
"outputs": [],
"source": [
"def get_all_details(url):\n",
" result = \"Landing page:\\n\"\n",
" result += Website(url).get_contents()\n",
" links = get_links(url)\n",
" print(\"Found links:\", links)\n",
" for link in links[\"links\"]:\n",
" result += f\"\\n\\n{link['type']}\\n\"\n",
" result += Website(link[\"url\"]).get_contents()\n",
" return result"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5099bd14-076d-4745-baf3-dac08d8e5ab2",
"metadata": {},
"outputs": [],
"source": [
"print(get_all_details(\"https://huggingface.co\"))"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "9b863a55-f86c-4e3f-8a79-94e24c1a8cf2",
"metadata": {},
"outputs": [],
"source": [
"system_prompt = \"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n",
"and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n",
"Include details of company culture, customers and careers/jobs if you have the information.\"\n",
"\n",
"# Or uncomment the lines below for a more humorous brochure - this demonstrates how easy it is to incorporate 'tone':\n",
"\n",
"# system_prompt = \"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n",
"# and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n",
"# Include details of company culture, customers and careers/jobs if you have the information.\"\n"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "6ab83d92-d36b-4ce0-8bcc-5bb4c2f8ff23",
"metadata": {},
"outputs": [],
"source": [
"def get_brochure_user_prompt(company_name, url):\n",
" user_prompt = f\"You are looking at a company called: {company_name}\\n\"\n",
" user_prompt += f\"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\\n\"\n",
" user_prompt += get_all_details(url)\n",
" user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters\n",
" return user_prompt"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cd909e0b-1312-4ce2-a553-821e795d7572",
"metadata": {},
"outputs": [],
"source": [
"print(get_brochure_user_prompt(\"HuggingFace\", \"https://huggingface.co\"))"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "e44de579-4a1a-4e6a-a510-20ea3e4b8d46",
"metadata": {},
"outputs": [],
"source": [
"def create_brochure(company_name, url):\n",
" response = openai.chat.completions.create(\n",
" model=MODEL,\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": get_brochure_user_prompt(company_name, url)}\n",
" ],\n",
" )\n",
" result = response.choices[0].message.content\n",
" display(Markdown(result))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e093444a-9407-42ae-924a-145730591a39",
"metadata": {},
"outputs": [],
"source": [
"create_brochure(\"HuggingFace\", \"https://huggingface.com\")"
]
},
{
"cell_type": "markdown",
"id": "61eaaab7-0b47-4b29-82d4-75d474ad8d18",
"metadata": {},
"source": [
"## Finally - a minor improvement\n",
"\n",
"With a small adjustment, we can change this so that the results stream back from OpenAI,\n",
"with the familiar typewriter animation"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "51db0e49-f261-4137-aabe-92dd601f7725",
"metadata": {},
"outputs": [],
"source": [
"def stream_brochure(company_name, url):\n",
" stream = openai.chat.completions.create(\n",
" model=MODEL,\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": get_brochure_user_prompt(company_name, url)}\n",
" ],\n",
" stream=True\n",
" )\n",
" \n",
" response = \"\"\n",
" display_handle = display(Markdown(\"\"), display_id=True)\n",
" for chunk in stream:\n",
" response += chunk.choices[0].delta.content or ''\n",
" response = response.replace(\"```\",\"\").replace(\"markdown\", \"\")\n",
" update_display(Markdown(response), display_id=display_handle.display_id)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "56bf0ae3-ee9d-4a72-9cd6-edcac67ceb6d",
"metadata": {},
"outputs": [],
"source": [
"stream_brochure(\"HuggingFace\", \"https://huggingface.co\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "87bd1188",
"metadata": {},
"outputs": [],
"source": [
"stream_brochure(\"HuggingFace\", \"https://huggingface.co\")"
]
},
{
"cell_type": "markdown",
"id": "a9e7375d",
"metadata": {},
"source": [
"## **Multi-lingual with Multi-Tone in Desire Format**"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "af5c959f",
"metadata": {},
"outputs": [],
"source": [
"def multi_lingual_stream_brochure(company_name, url, language, tone):\n",
"\n",
" system_prompt = f\"\"\"\n",
"You are an assistant that analyzes the contents of several relevant pages from a company website and creates a visually appealing and professional short brochure for prospective customers, investors, and recruits. \n",
"The brochure should be written in {language} and use a {tone.lower()} tone throughout.\n",
"\n",
"The brochure should follow this structure (in {language}):\n",
"\n",
"1. **Front Cover**:\n",
" - Prominently display the company name as Title.\n",
" - Include a compelling headline or tagline.\n",
" - Add something engaging relevant to the companys mission.\n",
"\n",
"2. **About Us**:\n",
" - Provide a brief introduction to the company.\n",
" - State the companys core mission and vision.\n",
" - Mention the founding story or key milestones.\n",
"\n",
"3. **What We Offer**:\n",
" - Summarize the company's products, services, or solutions.\n",
" - Highlight benefits or unique selling points.\n",
" - Include testimonials or case studies if available.\n",
"\n",
"4. **Our Culture**:\n",
" - Outline the companys key values or guiding principles.\n",
" - Describe the workplace environment (e.g., innovation-driven, inclusive, collaborative).\n",
" - Highlight community engagement or CSR initiatives.\n",
"\n",
"5. **Who We Serve**:\n",
" - Describe the target customers or industries served.\n",
" - Mention notable clients or partners.\n",
" - Include testimonials or endorsements from customers.\n",
"\n",
"6. **Join Us**:\n",
" - Detail career or internship opportunities.\n",
" - Highlight benefits, career growth, or training opportunities.\n",
" - Provide direct links or steps to apply.\n",
"\n",
"7. **Contact Us**:\n",
" - Provide the companys address, phone number, and email.\n",
" - Include links to social media platforms.\n",
" - Add a link to the companys website.\n",
"\n",
"8. **Closing Note**:\n",
" - End with a thank-you message or an inspirational note for the reader.\n",
" - Add a call-to-action (e.g., “Get in touch today!” or “Explore more on our website”).\n",
"\n",
"Ensure the content is concise, engaging, visually clear, and tailored to the target audience. Use headings and subheadings to make the brochure easy to navigate. Include links and contact information wherever applicable.\n",
"\"\"\"\n",
"\n",
"\n",
" \n",
" stream = openai.chat.completions.create(\n",
" model=MODEL,\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": get_brochure_user_prompt(company_name, url)}\n",
" ],\n",
" stream=True\n",
" )\n",
" \n",
" response = \"\"\n",
" display_handle = display(Markdown(\"\"), display_id=True)\n",
" for chunk in stream:\n",
" response += chunk.choices[0].delta.content or ''\n",
" response = response.replace(\"```\",\"\").replace(\"markdown\", \"\")\n",
" update_display(Markdown(response), display_id=display_handle.display_id)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "744bfc05",
"metadata": {},
"outputs": [],
"source": [
"\n",
"multi_lingual_stream_brochure(\"OpenAI\", \"https://openai.com/\", \"Urdu\", \"humorous, entertaining, jokey\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "llm_env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,10 @@
John Doe
Software Engineer
Experience:
- Developed web applications using Python and JavaScript.
- Collaborated with cross-functional teams to deliver projects on time.
Education:
- B.S. in Computer Science from XYZ University.
Skills:
- Python, JavaScript, React, SQL

View File

@@ -0,0 +1,248 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "fe12c203-e6a6-452c-a655-afb8a03a4ff5",
"metadata": {},
"source": [
"# End of week 1 exercise\n",
"\n",
"To demonstrate your familiarity with OpenAI API, and also Ollama, build a tool that takes a technical question, \n",
"and responds with an explanation. This is a tool that you will be able to use yourself during the course!"
]
},
{
"cell_type": "code",
"execution_count": 52,
"id": "c1070317-3ed9-4659-abe3-828943230e03",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"import os\n",
"import requests\n",
"import json \n",
"from dotenv import load_dotenv\n",
"from IPython.display import Markdown, display, update_display\n",
"from openai import OpenAI\n",
"import ollama\n"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "4a456906-915a-4bfd-bb9d-57e505c5093f",
"metadata": {},
"outputs": [],
"source": [
"# constants\n",
"\n",
"MODEL_GPT = 'gpt-4o-mini'\n",
"MODEL_LLAMA = 'llama3.2'\n",
"OLLAMA_API = \"http://localhost:11434/api/chat\"\n"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "0bb65a08-9090-434a-b99d-5659a370cfbc",
"metadata": {},
"outputs": [],
"source": [
"# Prompts\n",
"\n",
"system_prompt = \"You are a tutor and helps with the user questions in detail with markdown respond with key point \\\n",
"considering the recent development around the world, keep the response in most appropriate tone \\n\"\n",
"\n",
"system_prompt += \"Some of Examples are\"\n",
"system_prompt += \"\"\"\n",
"{\"question\": \"1+1?\", \"response\": \"2\"},\n",
"{\"question\": \"why we shouls learn LLM Models?\", \"response\": \" Learning about Large Language Models (LLMs) is important because they are a rapidly evolving technology with the potential to significantly impact various industries, offering advanced capabilities in text generation, translation, information retrieval, and more, which can be valuable for professionals across diverse fields, allowing them to enhance their work and gain a competitive edge by understanding and utilizing these powerful language processing tools.\\ \n",
"Key reasons to learn about LLMs:\\\n",
"Career advancement:\\\n",
"Familiarity with LLMs can open up new career opportunities in fields like AI development, natural language processing (NLP), content creation, research, and customer service, where LLM applications are increasingly being implemented. \\\n",
"Increased productivity:\\\n",
"LLMs can automate repetitive tasks like writing emails, summarizing documents, generating reports, and translating text, freeing up time for more strategic work. \\\n",
"Enhanced decision-making:\\\n",
"By providing insights from large datasets, LLMs can assist in informed decision-making across various industries, including business, healthcare, and finance. \\\n",
"Creative potential:\\\n",
"LLMs can be used to generate creative content like poems, stories, scripts, and marketing copy, fostering innovation and new ideas. \\\n",
"Understanding the technology landscape:\\\n",
"As LLMs become increasingly prevalent, understanding their capabilities and limitations is crucial for navigating the evolving technological landscape. \\\n",
"What is a large language model (LLM)? - Cloudflare\\\n",
"A large language model (LLM) is a type of artificial intelligence (AI) program that can recognize and generate text, among other t...\\\n",
" \"},\n",
"{\"question\": \"what is the future of AI?\", \"response\": \"AI is predicted to grow increasingly pervasive as technology develops, revolutionising sectors including healthcare, banking, and transportation\"},\n",
"\"\"\"\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "a8d7923c-5f28-4c30-8556-342d7c8497c1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"API key looks good so far\n"
]
}
],
"source": [
"# set up environment\n",
"load_dotenv()\n",
"api_key = os.getenv('OPENAI_API_KEY')\n",
"\n",
"if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:\n",
" print(\"API key looks good so far\")\n",
"else:\n",
" print(\"There might be a problem with your API key? Please visit the troubleshooting notebook!\")\n",
" \n",
"MODEL = 'gpt-4o-mini'\n",
"openai = OpenAI()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "3f0d0137-52b0-47a8-81a8-11a90a010798",
"metadata": {},
"outputs": [],
"source": [
"# here is the question; type over this to ask something new\n",
"\n",
"user_question = \"\"\"\n",
"How important it is for a Data Engineers to learn LLM, Considering the evolution of AI now a days?.\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "60ce7000-a4a5-4cce-a261-e75ef45063b4",
"metadata": {},
"outputs": [
{
"data": {
"text/markdown": [
"{\"question\": \"How important is it for Data Engineers to learn LLMs?\", \"response\": \"The importance of Data Engineers learning about Large Language Models (LLMs) cannot be overstated, especially given the rapid evolution of AI and its applications across various domains. Here's why this knowledge is essential:\n",
"\n",
"### Key Reasons for Data Engineers to Learn about LLMs:\n",
"\n",
"1. **Integration of AI in Data Pipelines:**\n",
" - As organizations increasingly adopt AI-driven solutions, Data Engineers will need to integrate LLMs into data pipelines for tasks such as text processing, feature extraction, and sentiment analysis.\n",
"\n",
"2. **Understanding Data Requirements:**\n",
" - LLMs require substantial and specific datasets for optimal performance. Knowledge of these requirements will help Data Engineers curate, preprocess, and manage data more effectively.\n",
"\n",
"3. **Enhanced Data Quality:**\n",
" - Data Engineers play a crucial role in ensuring data quality. Understanding LLMs can guide them in implementing effective validation checks and enhancing the data used for training these models.\n",
"\n",
"4. **Collaboration with Data Scientists:**\n",
" - Data Engineers are essential collaborators with Data Scientists. A solid grasp of LLMs will enable them to facilitate better communication and cooperation in model deployment and optimization.\n",
"\n",
"5. **Innovation in Product Development:**\n",
" - Familiarity with LLMs will enable Data Engineers to contribute innovative ideas for new products or features that leverage language processing capabilities, leading to enhanced user experiences.\n",
"\n",
"6. **Staying Current with Industry Trends:**\n",
" - The AI landscape is rapidly changing. Learning about LLMs keeps Data Engineers abreast of current trends and technologies, ensuring they remain competitive in the job market and valuable to their organizations.\n",
"\n",
"7. **Ethical and Responsible AI:**\n",
" - Understanding LLMs involves awareness of their ethical considerations, such as bias and misuse. Data Engineers can advocate for responsible AI practices within their organizations by being educated on these issues.\n",
"\n",
"8. **Scalability Considerations:**\n",
" - Data Engineers will need to design systems that can scale efficiently, especially when dealing with the substantial computational resources required for training and deploying LLMs.\n",
"\n",
"### Conclusion:\n",
"In summary, learning about LLMs is crucial for Data Engineers as it not only enhances their skill set but also positions them to contribute meaningfully to AI initiatives within their organizations. Embracing this knowledge will ultimately drive innovation and efficiency in their data-driven projects.\"}"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Get gpt-4o-mini to answer, with streaming\n",
"def ask_tutor(question):\n",
" stream = openai.chat.completions.create(\n",
" model=MODEL_GPT,\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": question},\n",
" {\"role\": \"user\", \"content\": system_prompt}\n",
" ],\n",
" stream=True\n",
" )\n",
" \n",
" response = \"\"\n",
" display_handle = display(Markdown(\"\"), display_id=True)\n",
" for chunk in stream:\n",
" response += chunk.choices[0].delta.content or ''\n",
" response = response.replace(\"```\",\"\").replace(\"markdown\", \"\")\n",
" update_display(Markdown(response), display_id=display_handle.display_id)\n",
"\n",
"# call the gpt-4o-mini to answer with streaming\n",
"ask_tutor(user_question)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8f7c8ea8-4082-4ad0-8751-3301adcf6538",
"metadata": {},
"outputs": [],
"source": [
"# Get Llama 3.2 to answer\n",
"messages = [\n",
" {\"role\": \"user\", \"content\": user_question}\n",
"]\n",
"HEADERS = {\"Content-Type\": \"application/json\"}\n",
"payload = {\n",
" \"model\": MODEL_LLAMA,\n",
" \"messages\": messages,\n",
" \"stream\": True\n",
" }\n",
"\n",
"response = ollama.chat(model=MODEL_LLAMA, messages=messages)\n",
"reply = response['message']['content']\n",
"display(Markdown(reply))\n",
"\n",
"# # Process the response stream\n",
"# for line in response.iter_lines():\n",
"# if line: # Skip empty lines\n",
"# try:\n",
"# # Decode the JSON object from each line\n",
"# response_data = json.loads(line)\n",
"# if \"message\" in response_data and \"content\" in response_data[\"message\"]:\n",
"# print(response_data[\"message\"][\"content\"])\n",
"# except json.JSONDecodeError as e:\n",
"# print(f\"Failed to decode JSON: {e}\")\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,332 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "fe12c203-e6a6-452c-a655-afb8a03a4ff5",
"metadata": {},
"source": [
"# **End of week 1 exercise**\n",
"\n",
"To demonstrate your familiarity with OpenAI API, and also Ollama, build a tool that takes a technical question, \n",
"and responds with an explanation. This is a tool that you will be able to use yourself during the course!"
]
},
{
"cell_type": "markdown",
"id": "c70e5ab1",
"metadata": {},
"source": [
"## **1. Get a response from your favorite AI Tutor** "
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "c1070317-3ed9-4659-abe3-828943230e03",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from openai import OpenAI\n",
"import json\n",
"from dotenv import load_dotenv\n",
"from IPython.display import Markdown, display, update_display"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "65dace69",
"metadata": {},
"outputs": [],
"source": [
"load_dotenv()\n",
"api_key = os.getenv('OPENAI_API_KEY')\n",
"\n",
"if api_key and api_key.startswith('sk-proj-') and len(api_key) > 10:\n",
" print(\"API key looks good so far\")\n",
"else:\n",
" print(\"There might be a problem with your API key? Please visit the troubleshooting notebook!\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "4a456906-915a-4bfd-bb9d-57e505c5093f",
"metadata": {},
"outputs": [],
"source": [
"# constants\n",
"\n",
"MODEL_GPT = 'gpt-4o-mini'\n",
"MODEL_LLAMA = 'llama3.2'\n",
"\n",
"openai = OpenAI()\n",
"\n",
"ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "3673d863",
"metadata": {},
"outputs": [],
"source": [
"system_prompt = \"\"\"You are the software engnieer, phd in mathematics, machine learning engnieer, and other topics\"\"\"\n",
"system_prompt += \"\"\"\n",
"When responding, always use Markdown for formatting. For any code, use well-structured code blocks with syntax highlighting,\n",
"For instance:\n",
"```python\n",
"\n",
"sample_list = [for i in range(10)]\n",
"```\n",
"Another example\n",
"```javascript\n",
" function displayMessage() {\n",
" alert(\"Hello, welcome to JavaScript!\");\n",
" }\n",
"\n",
"```\n",
"\n",
"Break down explanations into clear, numbered steps for better understanding. \n",
"Highlight important terms using inline code formatting (e.g., `function_name`, `variable`).\n",
"Provide examples for any concepts and ensure all examples are concise, clear, and relevant.\n",
"Your goal is to create visually appealing, easy-to-read, and informative responses.\n",
"\n",
"\"\"\"\n"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "1df78d41",
"metadata": {},
"outputs": [],
"source": [
"def tutor_user_prompt(question):\n",
" # Ensure the question is properly appended to the user prompt.\n",
" user_prompt = (\n",
" \"Please carefully explain the following question in a step-by-step manner for clarity:\\n\\n\"\n",
" )\n",
" user_prompt += question\n",
" return user_prompt"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "6dccbccb",
"metadata": {},
"outputs": [],
"source": [
"\n",
"\n",
"def askTutor(question, MODEL):\n",
" # Generate the user prompt dynamically.\n",
" user_prompt = tutor_user_prompt(question)\n",
" \n",
" # OpenAI API call to generate response.\n",
" if MODEL == 'gpt-4o-mini':\n",
" print(f'You are getting response from {MODEL}')\n",
" stream = openai.chat.completions.create(\n",
" model=MODEL,\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt}\n",
" ],\n",
" stream=True\n",
" )\n",
" else:\n",
" MODEL == 'llama3.2'\n",
" print(f'You are getting response from {MODEL}')\n",
" stream = ollama_via_openai.chat.completions.create(\n",
" model=MODEL,\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt}\n",
" ],\n",
" stream=True\n",
" )\n",
"\n",
" # Initialize variables for response processing.\n",
" response = \"\"\n",
" display_handle = display(Markdown(\"\"), display_id=True)\n",
" \n",
" # Process the response stream and update display dynamically.\n",
" for chunk in stream:\n",
" # Safely access the content attribute.\n",
" response_chunk = getattr(chunk.choices[0].delta, \"content\", \"\")\n",
" if response_chunk: # Check if response_chunk is not None or empty\n",
" response += response_chunk\n",
" # No replacement of Markdown formatting here!\n",
" update_display(Markdown(response), display_id=display_handle.display_id)\n"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "a8d7923c-5f28-4c30-8556-342d7c8497c1",
"metadata": {},
"outputs": [],
"source": [
"# here is the question; type over this to ask something new\n",
"\n",
"question = \"\"\"\n",
"Please explain what this code does and why:\n",
"yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3f0d0137-52b0-47a8-81a8-11a90a010798",
"metadata": {},
"outputs": [],
"source": [
"askTutor(question=question, MODEL=MODEL_GPT)"
]
},
{
"cell_type": "markdown",
"id": "b79f9479",
"metadata": {},
"source": [
"## **2. Using both LLMs collaboratively approach**"
]
},
{
"cell_type": "markdown",
"id": "80e3c8f5",
"metadata": {},
"source": [
"- I thought about like similar the idea of a RAG (Retrieval-Augmented Generation) approach, is an excellent idea to improve responses by refining the user query and producing a polished, detailed final answer. Two LLM talking each other its cool!!! Here's how we can implement this:\n",
"\n",
"**Updated Concept:**\n",
"1. Refine Query with Ollama:\n",
" - Use Ollama to refine the raw user query into a well-structured prompt.\n",
" - This is especially helpful when users input vague or poorly structured queries.\n",
"2. Generate Final Response with GPT:\n",
" - Pass the refined prompt from Ollama to GPT to generate the final, detailed, and polished response.\n",
"3. Return the Combined Output:\n",
" - Combine the input, refined query, and the final response into a single display to ensure clarity."
]
},
{
"cell_type": "code",
"execution_count": 59,
"id": "60f5ac2d",
"metadata": {},
"outputs": [],
"source": [
"def refine_with_ollama(raw_question):\n",
" \"\"\"\n",
" Use Ollama to refine the user's raw question into a well-structured prompt.\n",
" \"\"\"\n",
" print(\"Refining the query using Ollama...\")\n",
" messages = [\n",
" {\"role\": \"system\", \"content\": \"You are a helpful assistant. Refine and structure the following user input.\"},\n",
"\n",
" {\"role\": \"user\", \"content\": raw_question},\n",
" ]\n",
" response = ollama_via_openai.chat.completions.create(\n",
" model=MODEL_LLAMA,\n",
" messages=messages,\n",
" stream=False # Non-streamed refinement\n",
" )\n",
" refined_query = response.choices[0].message.content\n",
" return refined_query"
]
},
{
"cell_type": "code",
"execution_count": 60,
"id": "2aa4c9f6",
"metadata": {},
"outputs": [],
"source": [
"def ask_with_ollama_and_gpt(raw_question):\n",
" \"\"\"\n",
" Use Ollama to refine the user query and GPT to generate the final response.\n",
" \"\"\"\n",
" # Step 1: Refine the query using Ollama\n",
" refined_query = refine_with_ollama(raw_question)\n",
" \n",
" # Step 2: Generate final response with GPT\n",
" print(\"Generating the final response using GPT...\")\n",
" messages = [\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": refined_query},\n",
" ]\n",
" stream = openai.chat.completions.create(\n",
" model=MODEL_GPT,\n",
" messages=messages,\n",
" stream=True # Stream response for dynamic display\n",
" )\n",
"\n",
" # Step 3: Combine responses\n",
" response = \"\"\n",
" display_handle = display(Markdown(f\"### Refined Query:\\n\\n{refined_query}\\n\\n---\\n\\n### Final Response:\"), display_id=True)\n",
" for chunk in stream:\n",
" response_chunk = getattr(chunk.choices[0].delta, \"content\", \"\")\n",
" if response_chunk:\n",
" response += response_chunk\n",
" update_display(Markdown(f\"### Refined Query:\\n\\n{refined_query}\\n\\n---\\n\\n### Final Response:\\n\\n{response}\"), display_id=display_handle.display_id)"
]
},
{
"cell_type": "code",
"execution_count": 61,
"id": "4150e857",
"metadata": {},
"outputs": [],
"source": [
"# Example Usage\n",
"question = \"\"\"\n",
"Please explain what this code does:\n",
"yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f2b8935f",
"metadata": {},
"outputs": [],
"source": [
"ask_with_ollama_and_gpt(raw_question=question)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "086a5294",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "llm_env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}