LLM_Engineering_OLD/community-contributions/sach91-bootcamp/week1-exercise.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "fe12c203-e6a6-452c-a655-afb8a03a4ff5",
   "metadata": {},
   "source": [
    "# End of week 1 exercise\n",
    "\n",
    "To demonstrate your familiarity with OpenAI API, and also Ollama, build a tool that takes a technical question,  \n",
    "and responds with an explanation. This is a tool that you will be able to use yourself during the course!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c1070317-3ed9-4659-abe3-828943230e03",
   "metadata": {},
   "outputs": [],
   "source": [
    "# imports\n",
    "from openai import OpenAI\n",
    "from IPython.display import display, Markdown, update_display"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4a456906-915a-4bfd-bb9d-57e505c5093f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# constants\n",
    "# MODEL_GPT = 'gpt-4o-mini'\n",
    "MODEL_LLAMA = 'llama3.2'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a8d7923c-5f28-4c30-8556-342d7c8497c1",
   "metadata": {},
   "outputs": [],
   "source": [
    "# set up environment\n",
    "\n",
    "class LLM_MODEL:\n",
    "\n",
    "    def ask_model(self, sys_prompt, usr_prompt):\n",
    "        model_url =  'http://localhost:11434/v1/'\n",
    "        client = OpenAI(base_url=model_url, api_key='ollama')\n",
    "        msg = [{'role':'system', 'content':sys_prompt},{'role':'user', 'content':usr_prompt}]\n",
    "        response = client.chat.completions.create(model=MODEL_LLAMA, messages=msg)\n",
    "        return response.choices[0].message.content\n",
    "\n",
    "    def ask_model_stream(self, sys_prompt, usr_prompt):\n",
    "        model_url =  'http://localhost:11434/v1/'\n",
    "        client = OpenAI(base_url=model_url, api_key='ollama')\n",
    "        msg = [{'role':'system', 'content':sys_prompt},{'role':'user', 'content':usr_prompt}]\n",
    "        stream = client.chat.completions.create(model=MODEL_LLAMA, messages=msg, stream=True)\n",
    "        return stream\n",
    "\n",
    "model = LLM_MODEL()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6f448d69-3cec-4915-8697-f1046ba23e4a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Task 1: Tight Speed\n",
    "\n",
    "sys_prompt = 'You are a helpful assistant who helps me understand technical questions.\\n'\n",
    "usr_prompt = 'It takes Alex 2 hours to travel a distance of 3 kms. What is the speed of Alex?'\n",
    "\n",
    "resp = model.ask_model(sys_prompt, usr_prompt)\n",
    "display(Markdown(resp))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3f0d0137-52b0-47a8-81a8-11a90a010798",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Task 2: Travel the world in X days?\n",
    "\n",
    "sys_prompt = 'You are a helpful assistant who helps me understand technical questions.\\n'\n",
    "usr_prompt = 'There are many cities in our world. Can you tell me how to travel the whole world in least number of days ?'\n",
    "\n",
    "resp = model.ask_model(sys_prompt, usr_prompt)\n",
    "display(Markdown(resp))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "60ce7000-a4a5-4cce-a261-e75ef45063b4",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Task 3: Generate Code for task 4 to scrap some webpages\n",
    "\n",
    "sys_prompt = 'You are a coding expert who generates python code for given problem.\\n'\n",
    "usr_prompt = 'Given a website URL, I want to a python function to get the contents of the webpage, and another function to parse all links in the given webpage text.'\n",
    "\n",
    "resp = model.ask_model(sys_prompt, usr_prompt)\n",
    "print(resp)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8f7c8ea8-4082-4ad0-8751-3301adcf6538",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Scrap some webpages\n",
    "\n",
    "import requests\n",
    "from bs4 import BeautifulSoup\n",
    "\n",
    "def get_webpage_content(url):\n",
    "    \"\"\"\n",
    "    Fetches the contents of a website.\n",
    "    \n",
    "    Args:\n",
    "        url (str): URL of the webpage.\n",
    "    \n",
    "    Returns:\n",
    "        str: HTML content of the webpage.\n",
    "    \"\"\"\n",
    "    try:\n",
    "        response = requests.get(url)\n",
    "        response.raise_for_status()  # Raise an exception for HTTP errors\n",
    "        return response.text\n",
    "    except requests.exceptions.RequestException as e:\n",
    "        print(f\"Error fetching webpage: {e}\")\n",
    "        return None\n",
    "\n",
    "def parse_links(html_content, base_url=\"\"):\n",
    "    \"\"\"\n",
    "    Parses links from a given HTML content.\n",
    "    \n",
    "    Args:\n",
    "        html_content (str): HTML content of the webpage.\n",
    "        base_url (str): Base URL to construct relative link URLs. Defaults to \"\".\n",
    "    \n",
    "    Returns:\n",
    "        list: List of extracted URLs.\n",
    "    \"\"\"\n",
    "    soup = BeautifulSoup(html_content, 'html.parser')\n",
    "    links = []\n",
    "\n",
    "    for tag in soup.find_all('a'):\n",
    "        href = tag.get('href')\n",
    "\n",
    "        # Handle absolute and relative URLs\n",
    "        if not href or href.startswith('/'):\n",
    "            url = \"\"\n",
    "        else:\n",
    "            if 0 and base_url:\n",
    "                url = f\"{base_url}{href}\"\n",
    "            else:\n",
    "                url = href\n",
    "\n",
    "        if url.startswith('https:/'):\n",
    "            links.append(url)\n",
    "\n",
    "    return links\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "77286a37-7d34-44f0-bbab-abd1d33b21b3",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# Task 4: Make a brochure using the web-content\n",
    "\n",
    "# Example usage\n",
    "webname, url = 'Huggingface', \"http://www.huggingface.co\"\n",
    "\n",
    "html_content = get_webpage_content(url)\n",
    "links = parse_links(html_content, url)\n",
    "\n",
    "print(\"Extracted Links:\")\n",
    "content = f'Link:{url} -> Content:{html_content}\\n'\n",
    "for link in links:\n",
    "    print(link)\n",
    "    html_content = get_webpage_content(url)\n",
    "    content += f'Link:{link} -> Content:{html_content}\\n'\n",
    "\n",
    "sys_prompt = 'You are a helpful assistant who helps me create a brochure for a website.\\n'\n",
    "usr_prompt = f'You are given the contents for a few pages for the website of {webname} following next line.\\n' + \\\n",
    "             content + \\\n",
    "             'Use this information to give the brochure for this company.\\n'\n",
    "\n",
    "stream = model.ask_model_stream(sys_prompt, usr_prompt)\n",
    "\n",
    "response = ''\n",
    "display_handle = display(Markdown(\"\"), display_id=True)\n",
    "\n",
    "for chunk in stream:\n",
    "    response += chunk.choices[0].delta.content or ''\n",
    "    update_display(Markdown(response), display_id=display_handle.display_id)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "55344cc4-e377-4c75-9b39-87a29674b9f0",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}