Merge pull request #520 from RalphMaa/day5-community-contributions-branch

Add Day 5 translation challenge and exercise
2025-07-18 22:58:42 -04:00
parent 5441f2b38b e118ae0312
commit ae6152f84f
2 changed files with 557 additions and 0 deletions
--- a/week1/community-contributions/day5_challenge_exercise/day5_exercise.ipynb
+++ b/week1/community-contributions/day5_challenge_exercise/day5_exercise.ipynb
@@ -0,0 +1,191 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "75e66023-eccf-46a9-8b70-7b21ede16ddd",
+   "metadata": {},
+   "source": [
+    "# End of week 1 exercise\n",
+    "\n",
+    "To demonstrate your familiarity with OpenAI API, and also Ollama, build a tool that takes a technical question,  \n",
+    "and responds with an explanation. This is a tool that you will be able to use yourself during the course!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "72d21373-edbd-4432-a29d-db8e6c9c5808",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# imports\n",
+    "\n",
+    "import os\n",
+    "from dotenv import load_dotenv\n",
+    "from IPython.display import Markdown, display, update_display\n",
+    "from openai import OpenAI\n",
+    "import ollama"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d4e4c15b-7ae8-43e9-839d-7cc49345be5a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!ollama pull llama3.2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7fb44166-1c65-42fc-9950-1960bc3cc432",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# constants\n",
+    "\n",
+    "MODEL_GPT = 'gpt-4o-mini'\n",
+    "MODEL_LLAMA = 'llama3.2'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "58f5f1e1-5296-4631-9698-8645d4621a0c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# set up environment\n",
+    "\n",
+    "# Get the openai key\n",
+    "\n",
+    "load_dotenv(override=True)\n",
+    "openai_api_key = os.getenv('OPENAI_API_KEY')\n",
+    "\n",
+    "if openai_api_key and openai_api_key.startswith('sk-proj-') and len(openai_api_key)>10:\n",
+    "    print(\"API key looks good so far\")\n",
+    "else:\n",
+    "    print(\"There might be a problem with your API key? Please visit the troubleshooting notebook!\")\n",
+    "\n",
+    "openai = OpenAI()\n",
+    "# Get the ollama key using the llama model\n",
+    "\n",
+    "ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "12f07b33-76b9-42fa-9962-21f2a5796126",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "system_prompt = \"You are a knowledgeable technical instructor who helps students understand \\\n",
+    "complex concepts across a wide range of technical topics. Your expertise includes artificial]\\\n",
+    "intelligence, machine learning, large language models (LLMs), and programming in languages \\\n",
+    "such as Python, JavaScript, Java, and more. You also provide in-depth support for \\\n",
+    "AI engineering questions and other advanced technical subjects.\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "330abeb7-7db2-4f23-9d19-dd698058a400",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# here is the question; type over this to ask something new\n",
+    "\n",
+    "question = \"\"\"\n",
+    "Please explain what this code does and why:\n",
+    "yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bd11ad48-91ec-4cdf-9c57-99a0451e7a2f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get gpt-4o-mini to answer, with streaming\n",
+    "stream_GPT = openai.chat.completions.create(\n",
+    "        model=MODEL_GPT,\n",
+    "        messages=[\n",
+    "            {\"role\": \"system\", \"content\": system_prompt},\n",
+    "            {\"role\": \"user\", \"content\": question}\n",
+    "          ],\n",
+    "        stream = True\n",
+    "    )\n",
+    "response_GPT = \"\"\n",
+    "display_handle = display(Markdown(\"\"), display_id=True)\n",
+    "for chunk in stream_GPT:\n",
+    "    response_GPT += chunk.choices[0].delta.content or ''\n",
+    "    response_GPT = response_GPT.replace(\"```\",\"\").replace(\"markdown\", \"\")\n",
+    "    update_display(Markdown(response_GPT), display_id=display_handle.display_id)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dd2527ae-0d75-4f15-a45f-92075e3059d6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get Llama 3.2 to answer\n",
+    "\n",
+    "response_llama = ollama_via_openai.chat.completions.create(\n",
+    "        model=MODEL_LLAMA,\n",
+    "        messages=[\n",
+    "            {\"role\": \"system\", \"content\": system_prompt},\n",
+    "            {\"role\": \"user\", \"content\": question}\n",
+    "          ],\n",
+    "    )\n",
+    "result = response_llama.choices[0].message.content\n",
+    "\n",
+    "display(Markdown(result))\n",
+    "\n",
+    "# import ollama\n",
+    "\n",
+    "# response = ollama.chat(model=MODEL_LLAMA, messages=[\n",
+    "#             {\"role\": \"system\", \"content\": system_prompt},\n",
+    "#             {\"role\": \"user\", \"content\": question}\n",
+    "#           ])\n",
+    "# print(response['message']['content'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c2747739-ba64-4067-902f-c1acc0dbdaca",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/week1/community-contributions/day5_challenge_exercise/day5_translation_challenge.ipynb
+++ b/week1/community-contributions/day5_challenge_exercise/day5_translation_challenge.ipynb
@@ -0,0 +1,366 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "53b9681c-896a-4e5d-b62c-44c90612e67c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import requests\n",
+    "import json\n",
+    "from typing import List\n",
+    "from dotenv import load_dotenv\n",
+    "from bs4 import BeautifulSoup\n",
+    "from IPython.display import Markdown, display, update_display\n",
+    "from openai import OpenAI"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3c6f1133-5c17-4ca7-819c-f64cc48212ec",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Initialize constants and get api_key\n",
+    "\n",
+    "load_dotenv(override=True)\n",
+    "api_key = os.getenv('OPENAI_API_KEY')\n",
+    "\n",
+    "#Check if api_key is correct\n",
+    "if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:\n",
+    "    print(\"API key looks good so far\")\n",
+    "else:\n",
+    "    print(\"There might be a problem with your API key? Please visit the troubleshooting notebook!\")\n",
+    "    \n",
+    "MODEL = 'gpt-4o-mini'\n",
+    "openai = OpenAI()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4cdb0a59-b5e1-4df5-a17e-8c36c80695b4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# A class to represent a Webpage\n",
+    "\n",
+    "# Some websites need you to use proper headers when fetching them:\n",
+    "headers = {\n",
+    " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
+    "}\n",
+    "\n",
+    "class Website:\n",
+    "    \"\"\"\n",
+    "    A utility class to represent a Website that we have scraped, now with links\n",
+    "    \"\"\"\n",
+    "\n",
+    "    def __init__(self, url):\n",
+    "        self.url = url\n",
+    "        response = requests.get(url, headers=headers)\n",
+    "        self.body = response.content\n",
+    "        soup = BeautifulSoup(self.body, 'html.parser')\n",
+    "        self.title = soup.title.string if soup.title else \"No title found\"\n",
+    "        if soup.body:\n",
+    "            for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
+    "                irrelevant.decompose()\n",
+    "            self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n",
+    "        else:\n",
+    "            self.text = \"\"\n",
+    "        links = [link.get('href') for link in soup.find_all('a')]\n",
+    "        self.links = [link for link in links if link]\n",
+    "\n",
+    "    def get_contents(self):\n",
+    "        return f\"Webpage Title:\\n{self.title}\\nWebpage Contents:\\n{self.text}\\n\\n\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "50d4cffe-da7a-4cab-afea-d061a1a608ac",
+   "metadata": {},
+   "source": [
+    "Step 1: Find relevant links to the website in order to create the brochure (Use Multi-shot prompting)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b43b4c64-bc6a-41ca-bdb9-aa714e4e794e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "link_system_prompt = \"You are provided with a list of links found on a webpage like ['https://edwarddonner.com/', https://www.udemy.com/course/llm-engineering-master-ai-and-large-language-models/?referralCode=35EB41EBB11DD247CF54&couponCode=KEEPLEARNING] or ['https://huggingface.co/', https://huggingface.co/models] \\\n",
+    "You are able to decide which of the links would be most relevant to include in a brochure about the company, \\\n",
+    "such as links to an About page, or a News page, or a Home page, or a Company page, or Careers/Jobs pages.\\n\"\n",
+    "link_system_prompt += \"You should respond in JSON as in these example:\"\n",
+    "link_system_prompt += \"\"\"\n",
+    "{\n",
+    "    \"links\": [\n",
+    "        {\"type\": \"about page\", \"url\": \"https://full.url/goes/here/about\"},\n",
+    "        {\"type\": \"careers page\", \"url\": \"https://another.full.url/careers\"}\n",
+    "    ]\n",
+    "}\n",
+    "\n",
+    "{\n",
+    "    \"links\": [\n",
+    "        {\"type\": \"home page\", \"url\": \"https://full.url/goes/here/about\"},\n",
+    "        {\"type\": \"news page\", \"url\": \"https://another.full.url/careers\"}\n",
+    "    ]\n",
+    "}\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "15d2870c-67ab-4aa2-89f5-04b608a9c810",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_links_user_prompt(website):\n",
+    "    user_prompt = f\"Here is the list of links on the website of {website.url} - \"\n",
+    "    user_prompt += \"please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \\\n",
+    "Do not include Terms of Service, Privacy, email links.\\n\"\n",
+    "    user_prompt += \"Links (some might be relative links):\\n\"\n",
+    "    user_prompt += \"\\n\".join(website.links)\n",
+    "    return user_prompt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e255be42-5e71-47ca-9275-c0cf22beeb00",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_links(url):\n",
+    "    website = Website(url)\n",
+    "    response = openai.chat.completions.create(\n",
+    "        model=MODEL,\n",
+    "        messages=[\n",
+    "            {\"role\": \"system\", \"content\": link_system_prompt},\n",
+    "            {\"role\": \"user\", \"content\": get_links_user_prompt(website)}\n",
+    "      ],\n",
+    "        response_format={\"type\": \"json_object\"}\n",
+    "    )\n",
+    "    result = response.choices[0].message.content\n",
+    "    return json.loads(result)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "818b6e50-c403-42a1-8ee4-7606eaf0006f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "get_links('https://huggingface.co/')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "030ceb9b-ef71-41fd-9f23-92cb6e1d137e",
+   "metadata": {},
+   "source": [
+    "Step 2: Generate the brochure using the relevant links we got from OpenAI's selection"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a703230e-d57b-43a5-bdd0-e25fc2ec2e3b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_all_details(url):\n",
+    "    result = \"Landing page:\\n\"\n",
+    "    result += Website(url).get_contents()\n",
+    "    links = get_links(url)\n",
+    "    print(\"Found links:\", links)\n",
+    "    for link in links[\"links\"]:\n",
+    "        result += f\"\\n\\n{link['type']}\\n\"\n",
+    "        result += Website(link[\"url\"]).get_contents()\n",
+    "    return result"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "74d19852-f817-4fee-a95c-35ca7a83234f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "system_prompt = \"\"\"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n",
+    "and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n",
+    "Include details of company culture, customers and careers/jobs if you have the information. \\\n",
+    "Example 1: \\\n",
+    "Relevant pages: \\\n",
+    "- https://example.com/about \\\n",
+    "- https://example.com/careers \\\n",
+    "- https://example.com/news \\\n",
+    "\n",
+    "Brochure: \\\n",
+    "# About ExampleCorp \\\n",
+    "ExampleCorp is a global leader in AI-driven logistics optimization. Founded in 2015, the company serves clients in over 30 countries... \\\n",
+    "\n",
+    "--- \\\n",
+    "\n",
+    "Example 2: \\\n",
+    "Relevant pages: \\\n",
+    "- https://techstart.io/home \\\n",
+    "- https://techstart.io/jobs \\\n",
+    "- https://techstart.io/customers \\\n",
+    "\n",
+    "Brochure: \\\n",
+    "# Welcome to TechStart \\\n",
+    "TechStart builds tools that power the future of software development. With a team-first culture and customers like Stripe, Atlassian... \\\n",
+    "\n",
+    "--- \\\n",
+    "\n",
+    "\"\"\"\n",
+    "\n",
+    "# Or uncomment the lines below for a more humorous brochure - this demonstrates how easy it is to incorporate 'tone':\n",
+    "\n",
+    "# system_prompt = \"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n",
+    "# and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n",
+    "# Include details of company culture, customers and careers/jobs if you have the information.\"\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a2f19085-0d03-4386-b390-a38014ca6590",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_brochure_user_prompt(company_name, url):\n",
+    "    user_prompt = f\"You are looking at a company called: {company_name}\\n\"\n",
+    "    user_prompt += f\"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\\n\"\n",
+    "    user_prompt += get_all_details(url)\n",
+    "    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters\n",
+    "    return user_prompt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0ddbdea7-cf80-48d4-8bce-a11bd1a32d47",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def create_brochure(company_name, url):\n",
+    "    response = openai.chat.completions.create(\n",
+    "        model=MODEL,\n",
+    "        messages=[\n",
+    "            {\"role\": \"system\", \"content\": system_prompt},\n",
+    "            {\"role\": \"user\", \"content\": get_brochure_user_prompt(company_name, url)}\n",
+    "          ],\n",
+    "    )\n",
+    "    result = response.choices[0].message.content\n",
+    "    # display(Markdown(result))\n",
+    "    return result"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "023c1ba0-7f5a-48ac-9a98-dd184432a758",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "create_brochure(\"HuggingFace\", \"https://huggingface.co\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "187651f6-d42d-405a-abed-732486161359",
+   "metadata": {},
+   "source": [
+    "Step 3: Translate to French"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7734915d-d38f-40ad-8335-0df39c91f6d8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "system_prompt = \"\"\"You are a translator that translates the English language to the French language \\\n",
+    "professionally. All you do, is first show the original version in english and then show the translate version below it in French.\\\n",
+    "Respond in Markdown\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "29a1b40c-9040-4a3d-808b-0ca906d5cfc8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_user_translation_prompt(company_name, url):\n",
+    "    user_prompt=\"You are to translate the following brochure from the english to the french \\\n",
+    "    language and going to display it with the English language brochure version first and then\\\n",
+    "    the French language brochure version, don't make any changes to it, just a translation, the \\\n",
+    "    following is the brochure:\"\n",
+    "    user_prompt+=create_brochure(company_name, url)\n",
+    "    return user_prompt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a6e45b1f-3fa6-4db8-9f73-8339265502a7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def translate_brochure(company_name, url):\n",
+    "    response = openai.chat.completions.create(\n",
+    "        model=MODEL,\n",
+    "        messages=[\n",
+    "            {\"role\": \"system\", \"content\": system_prompt},\n",
+    "            {\"role\": \"user\", \"content\": get_user_translation_prompt(company_name, url)}\n",
+    "          ],\n",
+    "    )\n",
+    "    result = response.choices[0].message.content\n",
+    "    display(Markdown(result))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f71c2496-76ea-4f25-9939-98ebd37cb6a6",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "translate_brochure(\"HuggingFace\", \"https://huggingface.co\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}