From e118ae0312bfc53116b32e47683b86bdd54c490a Mon Sep 17 00:00:00 2001 From: RalphMaa Date: Sun, 13 Jul 2025 16:11:28 -0400 Subject: [PATCH] Add Day 5 translation challenge and exercise --- .../day5_exercise.ipynb | 191 +++++++++ .../day5_translation_challenge.ipynb | 366 ++++++++++++++++++ 2 files changed, 557 insertions(+) create mode 100644 week1/community-contributions/day5_challenge_exercise/day5_exercise.ipynb create mode 100644 week1/community-contributions/day5_challenge_exercise/day5_translation_challenge.ipynb diff --git a/week1/community-contributions/day5_challenge_exercise/day5_exercise.ipynb b/week1/community-contributions/day5_challenge_exercise/day5_exercise.ipynb new file mode 100644 index 0000000..b746ed8 --- /dev/null +++ b/week1/community-contributions/day5_challenge_exercise/day5_exercise.ipynb @@ -0,0 +1,191 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "75e66023-eccf-46a9-8b70-7b21ede16ddd", + "metadata": {}, + "source": [ + "# End of week 1 exercise\n", + "\n", + "To demonstrate your familiarity with OpenAI API, and also Ollama, build a tool that takes a technical question, \n", + "and responds with an explanation. This is a tool that you will be able to use yourself during the course!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "72d21373-edbd-4432-a29d-db8e6c9c5808", + "metadata": {}, + "outputs": [], + "source": [ + "# imports\n", + "\n", + "import os\n", + "from dotenv import load_dotenv\n", + "from IPython.display import Markdown, display, update_display\n", + "from openai import OpenAI\n", + "import ollama" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d4e4c15b-7ae8-43e9-839d-7cc49345be5a", + "metadata": {}, + "outputs": [], + "source": [ + "!ollama pull llama3.2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7fb44166-1c65-42fc-9950-1960bc3cc432", + "metadata": {}, + "outputs": [], + "source": [ + "# constants\n", + "\n", + "MODEL_GPT = 'gpt-4o-mini'\n", + "MODEL_LLAMA = 'llama3.2'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "58f5f1e1-5296-4631-9698-8645d4621a0c", + "metadata": {}, + "outputs": [], + "source": [ + "# set up environment\n", + "\n", + "# Get the openai key\n", + "\n", + "load_dotenv(override=True)\n", + "openai_api_key = os.getenv('OPENAI_API_KEY')\n", + "\n", + "if openai_api_key and openai_api_key.startswith('sk-proj-') and len(openai_api_key)>10:\n", + " print(\"API key looks good so far\")\n", + "else:\n", + " print(\"There might be a problem with your API key? Please visit the troubleshooting notebook!\")\n", + "\n", + "openai = OpenAI()\n", + "# Get the ollama key using the llama model\n", + "\n", + "ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12f07b33-76b9-42fa-9962-21f2a5796126", + "metadata": {}, + "outputs": [], + "source": [ + "system_prompt = \"You are a knowledgeable technical instructor who helps students understand \\\n", + "complex concepts across a wide range of technical topics. Your expertise includes artificial]\\\n", + "intelligence, machine learning, large language models (LLMs), and programming in languages \\\n", + "such as Python, JavaScript, Java, and more. You also provide in-depth support for \\\n", + "AI engineering questions and other advanced technical subjects.\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "330abeb7-7db2-4f23-9d19-dd698058a400", + "metadata": {}, + "outputs": [], + "source": [ + "# here is the question; type over this to ask something new\n", + "\n", + "question = \"\"\"\n", + "Please explain what this code does and why:\n", + "yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bd11ad48-91ec-4cdf-9c57-99a0451e7a2f", + "metadata": {}, + "outputs": [], + "source": [ + "# Get gpt-4o-mini to answer, with streaming\n", + "stream_GPT = openai.chat.completions.create(\n", + " model=MODEL_GPT,\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": question}\n", + " ],\n", + " stream = True\n", + " )\n", + "response_GPT = \"\"\n", + "display_handle = display(Markdown(\"\"), display_id=True)\n", + "for chunk in stream_GPT:\n", + " response_GPT += chunk.choices[0].delta.content or ''\n", + " response_GPT = response_GPT.replace(\"```\",\"\").replace(\"markdown\", \"\")\n", + " update_display(Markdown(response_GPT), display_id=display_handle.display_id)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dd2527ae-0d75-4f15-a45f-92075e3059d6", + "metadata": {}, + "outputs": [], + "source": [ + "# Get Llama 3.2 to answer\n", + "\n", + "response_llama = ollama_via_openai.chat.completions.create(\n", + " model=MODEL_LLAMA,\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": question}\n", + " ],\n", + " )\n", + "result = response_llama.choices[0].message.content\n", + "\n", + "display(Markdown(result))\n", + "\n", + "# import ollama\n", + "\n", + "# response = ollama.chat(model=MODEL_LLAMA, messages=[\n", + "# {\"role\": \"system\", \"content\": system_prompt},\n", + "# {\"role\": \"user\", \"content\": question}\n", + "# ])\n", + "# print(response['message']['content'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c2747739-ba64-4067-902f-c1acc0dbdaca", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/week1/community-contributions/day5_challenge_exercise/day5_translation_challenge.ipynb b/week1/community-contributions/day5_challenge_exercise/day5_translation_challenge.ipynb new file mode 100644 index 0000000..744150c --- /dev/null +++ b/week1/community-contributions/day5_challenge_exercise/day5_translation_challenge.ipynb @@ -0,0 +1,366 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "53b9681c-896a-4e5d-b62c-44c90612e67c", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import requests\n", + "import json\n", + "from typing import List\n", + "from dotenv import load_dotenv\n", + "from bs4 import BeautifulSoup\n", + "from IPython.display import Markdown, display, update_display\n", + "from openai import OpenAI" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3c6f1133-5c17-4ca7-819c-f64cc48212ec", + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize constants and get api_key\n", + "\n", + "load_dotenv(override=True)\n", + "api_key = os.getenv('OPENAI_API_KEY')\n", + "\n", + "#Check if api_key is correct\n", + "if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:\n", + " print(\"API key looks good so far\")\n", + "else:\n", + " print(\"There might be a problem with your API key? Please visit the troubleshooting notebook!\")\n", + " \n", + "MODEL = 'gpt-4o-mini'\n", + "openai = OpenAI()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4cdb0a59-b5e1-4df5-a17e-8c36c80695b4", + "metadata": {}, + "outputs": [], + "source": [ + "# A class to represent a Webpage\n", + "\n", + "# Some websites need you to use proper headers when fetching them:\n", + "headers = {\n", + " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n", + "}\n", + "\n", + "class Website:\n", + " \"\"\"\n", + " A utility class to represent a Website that we have scraped, now with links\n", + " \"\"\"\n", + "\n", + " def __init__(self, url):\n", + " self.url = url\n", + " response = requests.get(url, headers=headers)\n", + " self.body = response.content\n", + " soup = BeautifulSoup(self.body, 'html.parser')\n", + " self.title = soup.title.string if soup.title else \"No title found\"\n", + " if soup.body:\n", + " for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n", + " irrelevant.decompose()\n", + " self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n", + " else:\n", + " self.text = \"\"\n", + " links = [link.get('href') for link in soup.find_all('a')]\n", + " self.links = [link for link in links if link]\n", + "\n", + " def get_contents(self):\n", + " return f\"Webpage Title:\\n{self.title}\\nWebpage Contents:\\n{self.text}\\n\\n\"" + ] + }, + { + "cell_type": "markdown", + "id": "50d4cffe-da7a-4cab-afea-d061a1a608ac", + "metadata": {}, + "source": [ + "Step 1: Find relevant links to the website in order to create the brochure (Use Multi-shot prompting)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b43b4c64-bc6a-41ca-bdb9-aa714e4e794e", + "metadata": {}, + "outputs": [], + "source": [ + "link_system_prompt = \"You are provided with a list of links found on a webpage like ['https://edwarddonner.com/', https://www.udemy.com/course/llm-engineering-master-ai-and-large-language-models/?referralCode=35EB41EBB11DD247CF54&couponCode=KEEPLEARNING] or ['https://huggingface.co/', https://huggingface.co/models] \\\n", + "You are able to decide which of the links would be most relevant to include in a brochure about the company, \\\n", + "such as links to an About page, or a News page, or a Home page, or a Company page, or Careers/Jobs pages.\\n\"\n", + "link_system_prompt += \"You should respond in JSON as in these example:\"\n", + "link_system_prompt += \"\"\"\n", + "{\n", + " \"links\": [\n", + " {\"type\": \"about page\", \"url\": \"https://full.url/goes/here/about\"},\n", + " {\"type\": \"careers page\", \"url\": \"https://another.full.url/careers\"}\n", + " ]\n", + "}\n", + "\n", + "{\n", + " \"links\": [\n", + " {\"type\": \"home page\", \"url\": \"https://full.url/goes/here/about\"},\n", + " {\"type\": \"news page\", \"url\": \"https://another.full.url/careers\"}\n", + " ]\n", + "}\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "15d2870c-67ab-4aa2-89f5-04b608a9c810", + "metadata": {}, + "outputs": [], + "source": [ + "def get_links_user_prompt(website):\n", + " user_prompt = f\"Here is the list of links on the website of {website.url} - \"\n", + " user_prompt += \"please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \\\n", + "Do not include Terms of Service, Privacy, email links.\\n\"\n", + " user_prompt += \"Links (some might be relative links):\\n\"\n", + " user_prompt += \"\\n\".join(website.links)\n", + " return user_prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e255be42-5e71-47ca-9275-c0cf22beeb00", + "metadata": {}, + "outputs": [], + "source": [ + "def get_links(url):\n", + " website = Website(url)\n", + " response = openai.chat.completions.create(\n", + " model=MODEL,\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": link_system_prompt},\n", + " {\"role\": \"user\", \"content\": get_links_user_prompt(website)}\n", + " ],\n", + " response_format={\"type\": \"json_object\"}\n", + " )\n", + " result = response.choices[0].message.content\n", + " return json.loads(result)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "818b6e50-c403-42a1-8ee4-7606eaf0006f", + "metadata": {}, + "outputs": [], + "source": [ + "get_links('https://huggingface.co/')" + ] + }, + { + "cell_type": "markdown", + "id": "030ceb9b-ef71-41fd-9f23-92cb6e1d137e", + "metadata": {}, + "source": [ + "Step 2: Generate the brochure using the relevant links we got from OpenAI's selection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a703230e-d57b-43a5-bdd0-e25fc2ec2e3b", + "metadata": {}, + "outputs": [], + "source": [ + "def get_all_details(url):\n", + " result = \"Landing page:\\n\"\n", + " result += Website(url).get_contents()\n", + " links = get_links(url)\n", + " print(\"Found links:\", links)\n", + " for link in links[\"links\"]:\n", + " result += f\"\\n\\n{link['type']}\\n\"\n", + " result += Website(link[\"url\"]).get_contents()\n", + " return result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "74d19852-f817-4fee-a95c-35ca7a83234f", + "metadata": {}, + "outputs": [], + "source": [ + "system_prompt = \"\"\"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n", + "and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n", + "Include details of company culture, customers and careers/jobs if you have the information. \\\n", + "Example 1: \\\n", + "Relevant pages: \\\n", + "- https://example.com/about \\\n", + "- https://example.com/careers \\\n", + "- https://example.com/news \\\n", + "\n", + "Brochure: \\\n", + "# About ExampleCorp \\\n", + "ExampleCorp is a global leader in AI-driven logistics optimization. Founded in 2015, the company serves clients in over 30 countries... \\\n", + "\n", + "--- \\\n", + "\n", + "Example 2: \\\n", + "Relevant pages: \\\n", + "- https://techstart.io/home \\\n", + "- https://techstart.io/jobs \\\n", + "- https://techstart.io/customers \\\n", + "\n", + "Brochure: \\\n", + "# Welcome to TechStart \\\n", + "TechStart builds tools that power the future of software development. With a team-first culture and customers like Stripe, Atlassian... \\\n", + "\n", + "--- \\\n", + "\n", + "\"\"\"\n", + "\n", + "# Or uncomment the lines below for a more humorous brochure - this demonstrates how easy it is to incorporate 'tone':\n", + "\n", + "# system_prompt = \"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n", + "# and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n", + "# Include details of company culture, customers and careers/jobs if you have the information.\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a2f19085-0d03-4386-b390-a38014ca6590", + "metadata": {}, + "outputs": [], + "source": [ + "def get_brochure_user_prompt(company_name, url):\n", + " user_prompt = f\"You are looking at a company called: {company_name}\\n\"\n", + " user_prompt += f\"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\\n\"\n", + " user_prompt += get_all_details(url)\n", + " user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters\n", + " return user_prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0ddbdea7-cf80-48d4-8bce-a11bd1a32d47", + "metadata": {}, + "outputs": [], + "source": [ + "def create_brochure(company_name, url):\n", + " response = openai.chat.completions.create(\n", + " model=MODEL,\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": get_brochure_user_prompt(company_name, url)}\n", + " ],\n", + " )\n", + " result = response.choices[0].message.content\n", + " # display(Markdown(result))\n", + " return result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "023c1ba0-7f5a-48ac-9a98-dd184432a758", + "metadata": {}, + "outputs": [], + "source": [ + "create_brochure(\"HuggingFace\", \"https://huggingface.co\")" + ] + }, + { + "cell_type": "markdown", + "id": "187651f6-d42d-405a-abed-732486161359", + "metadata": {}, + "source": [ + "Step 3: Translate to French" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7734915d-d38f-40ad-8335-0df39c91f6d8", + "metadata": {}, + "outputs": [], + "source": [ + "system_prompt = \"\"\"You are a translator that translates the English language to the French language \\\n", + "professionally. All you do, is first show the original version in english and then show the translate version below it in French.\\\n", + "Respond in Markdown\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "29a1b40c-9040-4a3d-808b-0ca906d5cfc8", + "metadata": {}, + "outputs": [], + "source": [ + "def get_user_translation_prompt(company_name, url):\n", + " user_prompt=\"You are to translate the following brochure from the english to the french \\\n", + " language and going to display it with the English language brochure version first and then\\\n", + " the French language brochure version, don't make any changes to it, just a translation, the \\\n", + " following is the brochure:\"\n", + " user_prompt+=create_brochure(company_name, url)\n", + " return user_prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6e45b1f-3fa6-4db8-9f73-8339265502a7", + "metadata": {}, + "outputs": [], + "source": [ + "def translate_brochure(company_name, url):\n", + " response = openai.chat.completions.create(\n", + " model=MODEL,\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": get_user_translation_prompt(company_name, url)}\n", + " ],\n", + " )\n", + " result = response.choices[0].message.content\n", + " display(Markdown(result))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f71c2496-76ea-4f25-9939-98ebd37cb6a6", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "translate_brochure(\"HuggingFace\", \"https://huggingface.co\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}