diff --git a/week1/community-contributions/rwothoromo/week1/week1 EXERCISE.ipynb b/week1/community-contributions/rwothoromo/week1/week1 EXERCISE.ipynb new file mode 100644 index 0000000..7e80d75 --- /dev/null +++ b/week1/community-contributions/rwothoromo/week1/week1 EXERCISE.ipynb @@ -0,0 +1,235 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "fe12c203-e6a6-452c-a655-afb8a03a4ff5", + "metadata": {}, + "source": [ + "# End of week 1 exercise\n", + "\n", + "To demonstrate your familiarity with OpenAI API, and also Ollama, build a tool that takes a technical question, \n", + "and responds with an explanation. This is a tool that you will be able to use yourself during the course!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c1070317-3ed9-4659-abe3-828943230e03", + "metadata": {}, + "outputs": [], + "source": [ + "# imports\n", + "# Important Pull request ref: https://chatgpt.com/share/677a9cb5-c64c-8012-99e0-e06e88afd293\n", + "\n", + "import re, requests, ollama\n", + "from bs4 import BeautifulSoup\n", + "from IPython.display import Markdown, display, update_display\n", + "from openai import OpenAI" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4a456906-915a-4bfd-bb9d-57e505c5093f", + "metadata": {}, + "outputs": [], + "source": [ + "# constants\n", + "\n", + "MODEL_GPT = 'gpt-4o-mini'\n", + "MODEL_LLAMA = 'llama3.2'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a8d7923c-5f28-4c30-8556-342d7c8497c1", + "metadata": {}, + "outputs": [], + "source": [ + "# set up environment\n", + "\n", + "headers = {\n", + " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n", + "}\n", + "\n", + "class Website:\n", + "\n", + " def __init__(self, url):\n", + " \"\"\"\n", + " Create this Website object from the given url using the BeautifulSoup library\n", + " \"\"\"\n", + " self.url = url\n", + " response = requests.get(url, headers=headers)\n", + " soup = BeautifulSoup(response.content, 'html.parser')\n", + " self.title = soup.title.string if soup.title else \"No title found\"\n", + " for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n", + " irrelevant.decompose()\n", + " self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n", + "\n", + "openai = OpenAI()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3f0d0137-52b0-47a8-81a8-11a90a010798", + "metadata": {}, + "outputs": [], + "source": [ + "# here is the question; type over this to ask something new\n", + "\n", + "# question = \"\"\"\n", + "# Please explain what this code does and why:\n", + "# yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n", + "# \"\"\"\n", + "\n", + "# question = \"\"\"\n", + "# Please explain what this code does and why:\n", + "# yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n", + "# Popular dev site https://projecteuler.net/\n", + "# \"\"\"\n", + "\n", + "question = \"\"\"\n", + "How good at Software Development is Elijah Rwothoromo? \\\n", + "He has a Wordpress site https://rwothoromo.wordpress.com/. \\\n", + "He also has a LinkedIn profile https://www.linkedin.com/in/rwothoromoelaijah/. \\\n", + "What can we learn from him?\n", + "\"\"\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e14fd3a1-0aca-4794-a0e0-57458e111fc9", + "metadata": {}, + "outputs": [], + "source": [ + "# Process URLs in the question to improve the prompt\n", + "\n", + "# Extract all URLs from the question string using regular expressions\n", + "urls = re.findall(r'https?://[^\\s)]+', question)\n", + "# print(urls)\n", + "\n", + "if len(urls) > 0:\n", + " \n", + " # Fetch the content for each URL using the Website class\n", + " scraped_content = []\n", + " for url in urls:\n", + " print(f\"Scraping: {url}\")\n", + " try:\n", + " site = Website(url)\n", + " content = f\"Content from {url}:\\n---\\n{site.text}\\n---\\n\" # delimiter ---\n", + " scraped_content.append(content)\n", + " except Exception as e:\n", + " print(f\"Could not scrape {url}: {e}\")\n", + " scraped_content.append(f\"Could not retrieve content from {url}.\\n\")\n", + " \n", + " # Combine all the scraped text into one string\n", + " all_scraped_text = \"\\n\".join(scraped_content)\n", + " \n", + " # Update the question with the scraped content\n", + " updated_question = f\"\"\"\n", + " Based on the following information, please answer the user's original question.\n", + " \n", + " --- TEXT FROM WEBSITES ---\n", + " {all_scraped_text}\n", + " --- END TEXT FROM WEBSITES ---\n", + " \n", + " --- ORIGINAL QUESTION ---\n", + " {question}\n", + " \"\"\"\n", + "else:\n", + " updated_question = question\n", + "\n", + "# print(updated_question)\n", + "\n", + "# system prompt to be more accurate for AI to just analyze the provided text.\n", + "system_prompt = \"You are an expert assistant. \\\n", + "Analyze the user's question and the provided text from relevant websites to synthesize a comprehensive answer in markdown format.\\\n", + "Provide a short summary, ignoring text that might be navigation-related.\"\n", + "\n", + "# Create the messages list with the newly updated prompt\n", + "messages = [\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": updated_question},\n", + "]\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "60ce7000-a4a5-4cce-a261-e75ef45063b4", + "metadata": {}, + "outputs": [], + "source": [ + "# Get gpt-4o-mini to answer, with streaming\n", + "\n", + "def get_gpt_response(question):\n", + " stream = openai.chat.completions.create(\n", + " model=MODEL_GPT,\n", + " messages=messages,\n", + " stream=True\n", + " )\n", + " \n", + " response = \"\"\n", + " display_handle = display(Markdown(\"\"), display_id=True)\n", + " for chunk in stream:\n", + " response += chunk.choices[0].delta.content or ''\n", + " response = response.replace(\"```\",\"\").replace(\"markdown\", \"\")\n", + " update_display(Markdown(response), display_id=display_handle.display_id)\n", + "\n", + "get_gpt_response(question)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8f7c8ea8-4082-4ad0-8751-3301adcf6538", + "metadata": {}, + "outputs": [], + "source": [ + "# Get Llama 3.2 to answer\n", + "\n", + "def get_llama_response(question):\n", + " response = ollama.chat(\n", + " model=MODEL_LLAMA,\n", + " messages=messages,\n", + " stream=False # just get the results, don't stream them\n", + " )\n", + " return response['message']['content']\n", + "\n", + "display(Markdown(get_llama_response(question)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa1e9987-7b6d-49c1-9a81-b1a92aceea72", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/week1/day1.ipynb b/week1/day1.ipynb index 414c638..f492110 100644 --- a/week1/day1.ipynb +++ b/week1/day1.ipynb @@ -497,26 +497,6 @@ "display_summary(\"https://anthropic.com\")" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "5a904323-acd9-4c8e-9a17-70df76184590", - "metadata": {}, - "outputs": [], - "source": [ - "display_summary(\"https://rwothoromo.wordpress.com/\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a66c9fe8-c26a-49dd-9bc4-9efffc638f95", - "metadata": {}, - "outputs": [], - "source": [ - "display_summary(\"https://openai.com\")" - ] - }, { "cell_type": "markdown", "id": "c951be1a-7f1b-448f-af1f-845978e47e2c", @@ -558,55 +538,23 @@ "source": [ "# Step 1: Create your prompts\n", "\n", - "system_prompt = \"You are a professional assistant\"\n", + "system_prompt = \"something here\"\n", "user_prompt = \"\"\"\n", - "Review this conversation and provide a comprehensive summary. Also, suggest how much better the converation could have gone:\n", - "\n", - "Dear Dev Contact,\n", - "\n", - "I hope this message finds you well.\n", - "I would like to share that I have proficiency in front-end design tools, particularly Figma, react and Angular. At this stage, I am keenly interested in finding opportunities to apply these skills professionally.\n", - "\n", - "If you are aware of any companies, projects, or platforms seeking enterprise in front-end design, I would be grateful for any advice or recommendations you might kindly provide.\n", - "\n", - "Thank you very much for your time and consideration.\n", - "\n", - "Hello Job Seeker,\n", - "\n", - "I hope you are doing well.\n", - "\n", - "Dev Contact: The last role (3 months gig) I saw was looking for a junior PHP Developer. Does your CV include that?\n", - "\n", - "Hello Dev Contact \n", - "Thank you for your feedback.\n", - "Yes my CV has PHP as one of my skill set. Can I share it with you?\n", - "\n", - "Dev Contact: They said \"It's late. Interviews were on Monday\"\n", - "\n", - "Hello Dev Contact\n", - "\n", - "Thanks for the update. When you hear of any opportunity please let me know.\n", - "\n", - "Dev Contact: For now, check out https://refactory.academy/courses/refactory-apprenticeship/\n", + " Lots of text\n", + " Can be pasted here\n", "\"\"\"\n", "\n", "# Step 2: Make the messages list\n", "\n", - "messages = [\n", - " {\"role\": \"system\", \"content\": system_prompt},\n", - " {\"role\": \"user\", \"content\": user_prompt},\n", - "] # fill this in\n", + "messages = [] # fill this in\n", "\n", "# Step 3: Call OpenAI\n", "\n", - "response = openai.chat.completions.create(\n", - " model = \"gpt-4o-mini\",\n", - " messages = messages\n", - ")\n", + "response =\n", "\n", "# Step 4: print the result\n", "\n", - "print(response.choices[0].message.content)" + "print(" ] }, { @@ -640,34 +588,6 @@ "id": "f4484fcf-8b39-4c3f-9674-37970ed71988", "metadata": {}, "outputs": [], - "source": [ - "# To perform summaries using a model running locally\n", - "import ollama\n", - "\n", - "# OLLAMA_API = \"http://localhost:11434/api/chat\"\n", - "# HEADERS = {\"Content-Type\": \"application/json\"}\n", - "MODEL = \"llama3.2\"\n", - "\n", - "\n", - "def summarize_with_local_model(url):\n", - " website = Website(url)\n", - " messages = messages_for(website)\n", - " response = ollama.chat(\n", - " model=MODEL,\n", - " messages=messages,\n", - " stream=False # just get the results, don't stream them\n", - " )\n", - " return response['message']['content']\n", - "\n", - "display(Markdown(summarize_with_local_model(\"https://rwothoromo.wordpress.com/\")))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e76cbf31-2a82-40b8-b2e7-e2ceae7483ed", - "metadata": {}, - "outputs": [], "source": [] } ], @@ -687,7 +607,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.7" + "version": "3.11.12" } }, "nbformat": 4, diff --git a/week1/day5.ipynb b/week1/day5.ipynb index 39142ef..5249ce8 100644 --- a/week1/day5.ipynb +++ b/week1/day5.ipynb @@ -144,15 +144,6 @@ " {\"type\": \"careers page\", \"url\": \"https://another.full.url/careers\"}\n", " ]\n", "}\n", - "\"\"\"\n", - "link_system_prompt += \"And this example:\"\n", - "link_system_prompt += \"\"\"\n", - "{\n", - " \"links\": [\n", - " {\"type\": \"for-you page\", \"url\": \"https://full.url/goes/here/services\"},\n", - " {\"type\": \"speak-to-a-human page\", \"url\": \"https://another.full.url/contact-us\"}\n", - " ]\n", - "}\n", "\"\"\"" ] }, @@ -222,9 +213,6 @@ "source": [ "# Anthropic has made their site harder to scrape, so I'm using HuggingFace..\n", "\n", - "# anthropic = Website(\"https://anthropic.com\")\n", - "# anthropic.links\n", - "# get_links(\"https://anthropic.com\")\n", "huggingface = Website(\"https://huggingface.co\")\n", "huggingface.links" ] @@ -284,15 +272,15 @@ "metadata": {}, "outputs": [], "source": [ - "# system_prompt = \"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n", - "# and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n", - "# Include details of company culture, customers and careers/jobs if you have the information.\"\n", + "system_prompt = \"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n", + "and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n", + "Include details of company culture, customers and careers/jobs if you have the information.\"\n", "\n", "# Or uncomment the lines below for a more humorous brochure - this demonstrates how easy it is to incorporate 'tone':\n", "\n", - "system_prompt = \"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n", - "and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n", - "Include details of company culture, customers and careers/jobs if you have the information.\"\n" + "# system_prompt = \"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n", + "# and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n", + "# Include details of company culture, customers and careers/jobs if you have the information.\"\n" ] }, { @@ -305,7 +293,6 @@ "def get_brochure_user_prompt(company_name, url):\n", " user_prompt = f\"You are looking at a company called: {company_name}\\n\"\n", " user_prompt += f\"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\\n\"\n", - " user_prompt += f\"Keep the details brief or concise, factoring in that they would be printed on a simple hand-out flyer.\\n\"\n", " user_prompt += get_all_details(url)\n", " user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters\n", " return user_prompt" @@ -337,28 +324,6 @@ " ],\n", " )\n", " result = response.choices[0].message.content\n", - " # display(Markdown(result))\n", - " # print(result)\n", - " return result" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0029e063-0c07-4712-82d9-536ec3579e80", - "metadata": {}, - "outputs": [], - "source": [ - "def translate_brochure(brochure, language):\n", - " system_prompt_for_language = \"You're an expert in \" + language + \". Translate the brochure!\"\n", - " response = openai.chat.completions.create(\n", - " model=MODEL,\n", - " messages=[\n", - " {\"role\": \"system\", \"content\": system_prompt_for_language},\n", - " {\"role\": \"user\", \"content\": brochure}\n", - " ],\n", - " )\n", - " result = response.choices[0].message.content\n", " display(Markdown(result))" ] }, @@ -372,28 +337,6 @@ "create_brochure(\"HuggingFace\", \"https://huggingface.co\")" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "f8371bf5-c4c0-4e52-9a2a-066d994b0510", - "metadata": {}, - "outputs": [], - "source": [ - "brochure = create_brochure(\"Paint and Sip Uganda\", \"https://paintandsipuganda.com/\")\n", - "# translate_brochure(brochure, \"Spanish\")\n", - "translate_brochure(brochure, \"Swahili\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "34e03db6-61d0-4fc5-bf66-4f679b9befde", - "metadata": {}, - "outputs": [], - "source": [ - "create_brochure(\"Wabeh\", \"https://wabeh.com/\")" - ] - }, { "cell_type": "markdown", "id": "61eaaab7-0b47-4b29-82d4-75d474ad8d18", @@ -558,7 +501,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.7" + "version": "3.11.13" } }, "nbformat": 4, diff --git a/week1/week1 EXERCISE.ipynb b/week1/week1 EXERCISE.ipynb index 7e80d75..f3486fe 100644 --- a/week1/week1 EXERCISE.ipynb +++ b/week1/week1 EXERCISE.ipynb @@ -18,13 +18,7 @@ "metadata": {}, "outputs": [], "source": [ - "# imports\n", - "# Important Pull request ref: https://chatgpt.com/share/677a9cb5-c64c-8012-99e0-e06e88afd293\n", - "\n", - "import re, requests, ollama\n", - "from bs4 import BeautifulSoup\n", - "from IPython.display import Markdown, display, update_display\n", - "from openai import OpenAI" + "# imports" ] }, { @@ -47,27 +41,7 @@ "metadata": {}, "outputs": [], "source": [ - "# set up environment\n", - "\n", - "headers = {\n", - " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n", - "}\n", - "\n", - "class Website:\n", - "\n", - " def __init__(self, url):\n", - " \"\"\"\n", - " Create this Website object from the given url using the BeautifulSoup library\n", - " \"\"\"\n", - " self.url = url\n", - " response = requests.get(url, headers=headers)\n", - " soup = BeautifulSoup(response.content, 'html.parser')\n", - " self.title = soup.title.string if soup.title else \"No title found\"\n", - " for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n", - " irrelevant.decompose()\n", - " self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n", - "\n", - "openai = OpenAI()\n" + "# set up environment" ] }, { @@ -79,81 +53,10 @@ "source": [ "# here is the question; type over this to ask something new\n", "\n", - "# question = \"\"\"\n", - "# Please explain what this code does and why:\n", - "# yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n", - "# \"\"\"\n", - "\n", - "# question = \"\"\"\n", - "# Please explain what this code does and why:\n", - "# yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n", - "# Popular dev site https://projecteuler.net/\n", - "# \"\"\"\n", - "\n", "question = \"\"\"\n", - "How good at Software Development is Elijah Rwothoromo? \\\n", - "He has a Wordpress site https://rwothoromo.wordpress.com/. \\\n", - "He also has a LinkedIn profile https://www.linkedin.com/in/rwothoromoelaijah/. \\\n", - "What can we learn from him?\n", - "\"\"\"\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e14fd3a1-0aca-4794-a0e0-57458e111fc9", - "metadata": {}, - "outputs": [], - "source": [ - "# Process URLs in the question to improve the prompt\n", - "\n", - "# Extract all URLs from the question string using regular expressions\n", - "urls = re.findall(r'https?://[^\\s)]+', question)\n", - "# print(urls)\n", - "\n", - "if len(urls) > 0:\n", - " \n", - " # Fetch the content for each URL using the Website class\n", - " scraped_content = []\n", - " for url in urls:\n", - " print(f\"Scraping: {url}\")\n", - " try:\n", - " site = Website(url)\n", - " content = f\"Content from {url}:\\n---\\n{site.text}\\n---\\n\" # delimiter ---\n", - " scraped_content.append(content)\n", - " except Exception as e:\n", - " print(f\"Could not scrape {url}: {e}\")\n", - " scraped_content.append(f\"Could not retrieve content from {url}.\\n\")\n", - " \n", - " # Combine all the scraped text into one string\n", - " all_scraped_text = \"\\n\".join(scraped_content)\n", - " \n", - " # Update the question with the scraped content\n", - " updated_question = f\"\"\"\n", - " Based on the following information, please answer the user's original question.\n", - " \n", - " --- TEXT FROM WEBSITES ---\n", - " {all_scraped_text}\n", - " --- END TEXT FROM WEBSITES ---\n", - " \n", - " --- ORIGINAL QUESTION ---\n", - " {question}\n", - " \"\"\"\n", - "else:\n", - " updated_question = question\n", - "\n", - "# print(updated_question)\n", - "\n", - "# system prompt to be more accurate for AI to just analyze the provided text.\n", - "system_prompt = \"You are an expert assistant. \\\n", - "Analyze the user's question and the provided text from relevant websites to synthesize a comprehensive answer in markdown format.\\\n", - "Provide a short summary, ignoring text that might be navigation-related.\"\n", - "\n", - "# Create the messages list with the newly updated prompt\n", - "messages = [\n", - " {\"role\": \"system\", \"content\": system_prompt},\n", - " {\"role\": \"user\", \"content\": updated_question},\n", - "]\n" + "Please explain what this code does and why:\n", + "yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n", + "\"\"\"" ] }, { @@ -163,23 +66,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Get gpt-4o-mini to answer, with streaming\n", - "\n", - "def get_gpt_response(question):\n", - " stream = openai.chat.completions.create(\n", - " model=MODEL_GPT,\n", - " messages=messages,\n", - " stream=True\n", - " )\n", - " \n", - " response = \"\"\n", - " display_handle = display(Markdown(\"\"), display_id=True)\n", - " for chunk in stream:\n", - " response += chunk.choices[0].delta.content or ''\n", - " response = response.replace(\"```\",\"\").replace(\"markdown\", \"\")\n", - " update_display(Markdown(response), display_id=display_handle.display_id)\n", - "\n", - "get_gpt_response(question)" + "# Get gpt-4o-mini to answer, with streaming" ] }, { @@ -189,26 +76,8 @@ "metadata": {}, "outputs": [], "source": [ - "# Get Llama 3.2 to answer\n", - "\n", - "def get_llama_response(question):\n", - " response = ollama.chat(\n", - " model=MODEL_LLAMA,\n", - " messages=messages,\n", - " stream=False # just get the results, don't stream them\n", - " )\n", - " return response['message']['content']\n", - "\n", - "display(Markdown(get_llama_response(question)))" + "# Get Llama 3.2 to answer" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fa1e9987-7b6d-49c1-9a81-b1a92aceea72", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -227,7 +96,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.7" + "version": "3.11.11" } }, "nbformat": 4,