From db93c1f151cf57a144420af900325d478c8118ef Mon Sep 17 00:00:00 2001 From: sach91 Date: Tue, 21 Oct 2025 22:32:15 +0530 Subject: [PATCH] sach91 bootcamp week1 exercise --- .../sach91-bootcamp/week1-exercise.ipynb | 516 ++++++++++++++++++ 1 file changed, 516 insertions(+) create mode 100644 community-contributions/sach91-bootcamp/week1-exercise.ipynb diff --git a/community-contributions/sach91-bootcamp/week1-exercise.ipynb b/community-contributions/sach91-bootcamp/week1-exercise.ipynb new file mode 100644 index 0000000..deb3d4a --- /dev/null +++ b/community-contributions/sach91-bootcamp/week1-exercise.ipynb @@ -0,0 +1,516 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "fe12c203-e6a6-452c-a655-afb8a03a4ff5", + "metadata": {}, + "source": [ + "# End of week 1 exercise\n", + "\n", + "To demonstrate your familiarity with OpenAI API, and also Ollama, build a tool that takes a technical question, \n", + "and responds with an explanation. This is a tool that you will be able to use yourself during the course!" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "c1070317-3ed9-4659-abe3-828943230e03", + "metadata": {}, + "outputs": [], + "source": [ + "# imports\n", + "from openai import OpenAI\n", + "from IPython.display import display, Markdown, update_display" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "4a456906-915a-4bfd-bb9d-57e505c5093f", + "metadata": {}, + "outputs": [], + "source": [ + "# constants\n", + "# MODEL_GPT = 'gpt-4o-mini'\n", + "MODEL_LLAMA = 'llama3.2'" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "a8d7923c-5f28-4c30-8556-342d7c8497c1", + "metadata": {}, + "outputs": [], + "source": [ + "# set up environment\n", + "\n", + "class LLM_MODEL:\n", + "\n", + " def ask_model(self, sys_prompt, usr_prompt):\n", + " model_url = 'http://localhost:11434/v1/'\n", + " client = OpenAI(base_url=model_url, api_key='ollama')\n", + " msg = [{'role':'system', 'content':sys_prompt},{'role':'user', 'content':usr_prompt}]\n", + " response = client.chat.completions.create(model=MODEL_LLAMA, messages=msg)\n", + " return response.choices[0].message.content\n", + "\n", + " def ask_model_stream(self, sys_prompt, usr_prompt):\n", + " model_url = 'http://localhost:11434/v1/'\n", + " client = OpenAI(base_url=model_url, api_key='ollama')\n", + " msg = [{'role':'system', 'content':sys_prompt},{'role':'user', 'content':usr_prompt}]\n", + " stream = client.chat.completions.create(model=MODEL_LLAMA, messages=msg, stream=True)\n", + " return stream\n", + "\n", + "model = LLM_MODEL()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "6f448d69-3cec-4915-8697-f1046ba23e4a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "To find the speed of Alex, we need to use the formula:\n", + "\n", + "Speed = Distance / Time\n", + "\n", + "We know the distance (3 kms) and the time it took for the journey (2 hours).\n", + "\n", + "First, let's convert the distance from kilometers to meters: 1 km = 1000 meters, so:\n", + "Distance (in meters) = 3 km × 1000 m/km = 3000 meters\n", + "\n", + "Now we can plug in the values:\n", + "\n", + "Speed = Distance / Time\n", + "= 3000 meters / 2 hours\n", + "= 1500 meters-per-hour\n", + "\n", + "To make it more readable, let's convert this to kilometers per hour (km/h):\n", + "1 meter = 0.001 km (to convert meters to kilometers), so:\n", + "= 1500 m ÷ 1000 = 1.5 km\n", + "\n", + "Therefore, Alex's speed is 1.5 kilometers per hour." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Task 1: Tight Speed\n", + "\n", + "sys_prompt = 'You are a helpful assistant who helps me understand technical questions.\\n'\n", + "usr_prompt = 'It takes Alex 2 hours to travel a distance of 3 kms. What is the speed of Alex?'\n", + "\n", + "resp = model.ask_model(sys_prompt, usr_prompt)\n", + "display(Markdown(resp))" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "3f0d0137-52b0-47a8-81a8-11a90a010798", + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "Traveling around the world is an exciting adventure! To help you minimize your travel time, I'll provide a general outline of the most efficient way to cover all continents and major cities.\n", + "\n", + "**The Most Efficient Route:**\n", + "\n", + "1. Start from North America (USA or Canada) and head east:\n", + "\t* Fly from Los Angeles to Dubai\n", + "\t* From Dubai, take a Middle Eastern flight to Istanbul, Turkey\n", + "2. Next, enter Europe by flying back west from Istanbul:\n", + "\t* Take trains and buses between major European cities like Berlin, Prague, Vienna, etc.\n", + "3. Head south into Asia:\n", + "\t* From Eastern Europe, fly to Delhi or Mumbai in India\n", + "\t* Then, take flights to Southeast Asian countries like Bangkok (Thailand), Jakarta (Indonesia), or Kuala Lumpur (Malaysia)\n", + "4. Cross into Africa and visit major cities:\n", + "\t* Fly from Southeast Asia to Cairo, Egypt\n", + "\t* Explore North African countries like Morocco, Tunisia, and Algeria\n", + "5. From Africa, head north into Europe again:\n", + "\t* Fly back to Western European countries like London (UK), Paris (France), or Amsterdam (Netherlands)\n", + "6. Finally, enter South America from Europe:\n", + "\t* Take flights from European cities to Buenos Aires (Argentina) or Rio de Janeiro (Brazil)\n", + "\n", + "**Tips and Considerations:**\n", + "\n", + "1. **Fly through major hubs:** Using airports like Dubai, Istanbul, Cairo, Bangkok, and Singapore will simplify your journey.\n", + "2. **Choose efficient airlines:** Look for ultra-low-cost carriers, budget airlines, or hybrid models that offer competitive prices.\n", + "3. **Plan smart connections:** Research flight schedules, layovers, and travel restrictions to minimize delays.\n", + "4. **Use visa-free policies:** Make the most of visa exemptions where possible, like e-Visas for India, Mexico, and some African countries.\n", + "5. **Health insurance:** Check if your travel insurance covers medical care abroad.\n", + "\n", + "**Time Estimates:**\n", + "\n", + "* Assuming a moderate pace (some planning, but no frills), you can cover around 10-15 major cities in 2-3 months with decent connections and layovers.\n", + "* However, this pace is dependent on your personal interests, budget, and flexibility. Be prepared to adjust based on changing circumstances or unexpected delays.\n", + "\n", + "**Additional Tips:**\n", + "\n", + "1. Consider the weather, peak tourist seasons, and holidays when planning your trip.\n", + "2. Bring essential documents like passports, visas (if required), travel insurance, and health certificates.\n", + "3. Research local regulations, COVID-19 guidelines, and vaccinations before traveling to specific countries.\n", + "\n", + "Keep in mind that this outline is a general suggestion, and actual times will vary depending on your start date, flight options, visa processing, and additional activities (like snorkeling or hiking) you'd like to incorporate.\n", + "\n", + "Is there anything else I can help with?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Task 2: Travel the world in X days?\n", + "\n", + "sys_prompt = 'You are a helpful assistant who helps me understand technical questions.\\n'\n", + "usr_prompt = 'There are many cities in our world. Can you tell me how to travel the whole world in least number of days ?'\n", + "\n", + "resp = model.ask_model(sys_prompt, usr_prompt)\n", + "display(Markdown(resp))" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "60ce7000-a4a5-4cce-a261-e75ef45063b4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Here's an example implementation using Python with the `requests` library to fetch the webpage content and `BeautifulSoup` for HTML parsing.\n", + "\n", + "### Install Required Libraries\n", + "```bash\n", + "pip install requests beautifulsoup4\n", + "```\n", + "\n", + "### Code Implementation\n", + "\n", + "```python\n", + "import requests\n", + "from bs4 import BeautifulSoup\n", + "\n", + "def get_webpage_content(url):\n", + " \"\"\"\n", + " Fetches the contents of a website.\n", + " \n", + " Args:\n", + " url (str): URL of the webpage.\n", + " \n", + " Returns:\n", + " str: HTML content of the webpage.\n", + " \"\"\"\n", + " try:\n", + " response = requests.get(url)\n", + " response.raise_for_status() # Raise an exception for HTTP errors\n", + " return response.text\n", + " except requests.exceptions.RequestException as e:\n", + " print(f\"Error fetching webpage: {e}\")\n", + " return None\n", + "\n", + "def parse_links(html_content, base_url=\"\"):\n", + " \"\"\"\n", + " Parses links from a given HTML content.\n", + " \n", + " Args:\n", + " html_content (str): HTML content of the webpage.\n", + " base_url (str): Base URL to construct relative link URLs. Defaults to \"\".\n", + " \n", + " Returns:\n", + " list: List of extracted URLs.\n", + " \"\"\"\n", + " soup = BeautifulSoup(html_content, 'html.parser')\n", + " links = []\n", + "\n", + " for tag in soup.find_all('a'):\n", + " href = tag.get('href')\n", + "\n", + " # Handle absolute and relative URLs\n", + " if not href or href.startswith('/'):\n", + " url = \"\"\n", + " else:\n", + " if base_url:\n", + " url = f\"{base_url}{href}\"\n", + " else:\n", + " url = href\n", + "\n", + " links.append(url)\n", + "\n", + " return links\n", + "\n", + "# Example usage\n", + "url = \"http://www.example.com\"\n", + "html_content = get_webpage_content(url)\n", + "links = parse_links(html_content, url)\n", + "\n", + "print(\"Extracted Links:\")\n", + "for link in links:\n", + " print(link)\n", + "```\n", + "\n", + "### How It Works\n", + "\n", + "1. `get_webpage_content` function takes a URL as input and fetches the corresponding webpage using `requests.get()`. It raises exceptions for HTTP errors.\n", + "2. `parse_links` function analyzes the provided HTML content to find all `` tags, extracts their `href` attributes, and constructs URLs by appending relative paths to a base URL (if specified).\n", + "3. If you want to inspect the behavior of this code with your own inputs, use the example usage above as reference.\n", + "\n", + "### Commit Message\n", + "```markdown\n", + "feat: add functions for URL fetching & HTML link parsing\n", + "\n", + "Description: Provides two main Python functions, `get_webpage_content` and `parse_links`, leveraging `requests` and `BeautifulSoup` respectively.\n", + "```\n", + "\n", + "Please feel free to ask me any questions or need further clarification.\n" + ] + } + ], + "source": [ + "# Task 3: Generate Code for task 4 to scrap some webpages\n", + "\n", + "sys_prompt = 'You are a coding expert who generates python code for given problem.\\n'\n", + "usr_prompt = 'Given a website URL, I want to a python function to get the contents of the webpage, and another function to parse all links in the given webpage text.'\n", + "\n", + "resp = model.ask_model(sys_prompt, usr_prompt)\n", + "print(resp)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "8f7c8ea8-4082-4ad0-8751-3301adcf6538", + "metadata": {}, + "outputs": [], + "source": [ + "# Scrap some webpages\n", + "\n", + "import requests\n", + "from bs4 import BeautifulSoup\n", + "\n", + "def get_webpage_content(url):\n", + " \"\"\"\n", + " Fetches the contents of a website.\n", + " \n", + " Args:\n", + " url (str): URL of the webpage.\n", + " \n", + " Returns:\n", + " str: HTML content of the webpage.\n", + " \"\"\"\n", + " try:\n", + " response = requests.get(url)\n", + " response.raise_for_status() # Raise an exception for HTTP errors\n", + " return response.text\n", + " except requests.exceptions.RequestException as e:\n", + " print(f\"Error fetching webpage: {e}\")\n", + " return None\n", + "\n", + "def parse_links(html_content, base_url=\"\"):\n", + " \"\"\"\n", + " Parses links from a given HTML content.\n", + " \n", + " Args:\n", + " html_content (str): HTML content of the webpage.\n", + " base_url (str): Base URL to construct relative link URLs. Defaults to \"\".\n", + " \n", + " Returns:\n", + " list: List of extracted URLs.\n", + " \"\"\"\n", + " soup = BeautifulSoup(html_content, 'html.parser')\n", + " links = []\n", + "\n", + " for tag in soup.find_all('a'):\n", + " href = tag.get('href')\n", + "\n", + " # Handle absolute and relative URLs\n", + " if not href or href.startswith('/'):\n", + " url = \"\"\n", + " else:\n", + " if 0 and base_url:\n", + " url = f\"{base_url}{href}\"\n", + " else:\n", + " url = href\n", + "\n", + " if url.startswith('https:/'):\n", + " links.append(url)\n", + "\n", + " return links\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "77286a37-7d34-44f0-bbab-abd1d33b21b3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Extracted Links:\n", + "https://endpoints.huggingface.co\n", + "https://apply.workable.com/huggingface/\n", + "https://discuss.huggingface.co\n", + "https://status.huggingface.co/\n", + "https://github.com/huggingface\n", + "https://twitter.com/huggingface\n", + "https://www.linkedin.com/company/huggingface/\n" + ] + }, + { + "data": { + "text/markdown": [ + "Here's a possible brochure design and content based on the code snippet provided:\n", + "\n", + "**[Cover Page]**\n", + "\n", + "* Title: Hugging Face\n", + "* Tagline: Building sustainable AI models for everyone\n", + "* Background image: A gradient background with a collage of diverse images, likely representing people from different cultures and backgrounds working together.\n", + "\n", + "**[Inside Pages]**\n", + "\n", + "**[Page 1: About Us]**\n", + "\n", + "* Headline: Discover the Power of AI Models on Hugging Face\n", + "* Text: Hugging Face is a leading open-source platform for natural language processing (NLP) models. Our mission is to empower researchers, developers, and businesses to build and use high-quality AI models that can be applied in various industries.\n", + "* Image: A group photo of the Hugging Face team\n", + "\n", + "**[Page 2: Models]**\n", + "\n", + "* Headline: Explore the Largest Collection of Pre-Trained NLP Models\n", + "* Text: Our model portal offers over 200 pre-trained models, covering a wide range of tasks such as sentiment analysis, entity recognition, and language translation.\n", + "* Features:\n", + " + Model browsing by task or dataset\n", + " + Filtering by accuracy, accuracy distribution, weights, and more\n", + "\t+ Training from scratch options for advanced users\n", + "* Image: A screenshot of the model portal with a random selection of models\n", + "\n", + "**[Page 3: Datasets]**\n", + "\n", + "* Headline: Tap into a Universe of High-Quality Datasets for Model Training\n", + "* Text: Hugging Face's dataset repository includes over 1 million datasets, covering various domains such as text analysis, speech recognition, and sentiment analysis.\n", + "* Features:\n", + " + Dataset browsing by domain or type\n", + " + Filtering by size, download time, license, and more\n", + "\t+ Data augmentation options\n", + "* Image: A screenshot of the dataset repository with a random selection of datasets\n", + "\n", + "**[Page 4: Spaces]**\n", + "\n", + "* Headline: Collaborate on Research Projects and Share Models\n", + "* Text: Our shared model hosting platform allows researchers to collaborate on open-source projects, share models, and receive feedback from community members.\n", + "* Features:\n", + " + Project creation options for collaboration\n", + "\t+ Model sharing and download\n", + "\t+ Discussion forums for feedback and support\n", + "* Image: A screenshot of the spaces dashboard with a selected project\n", + "\n", + "**[Page 5: Changelog]**\n", + "\n", + "* Headline: Stay Up-to-Date on the Latest Hugging Face Features\n", + "* Text: Get notified about new model releases, dataset updates, and feature enhancements through our changelog.\n", + "* Format:\n", + "\t+ List of recent features and bug fixes with brief descriptions\n", + "\t+ Links to documentation or demo models for some features\n", + "\t+ Option to subscribe to notifications via email\n", + "* Image: A screenshot of the changelog as it appears on a mobile device\n", + "\n", + "**[Back Cover]**\n", + "\n", + "* Call-to-Action (CTA): Sign up for our newsletter and get started with Hugging Face today!\n", + "* Text: \"Unlock the power of AI models for everyone. Subscribe to our newsletter for news, tutorials, and special offers.\"\n", + "* Background image: The same collage as the cover page.\n", + "\n", + "**Additional Materials**\n", + "\n", + "* Business card template with contact information\n", + "* Letterhead with the company's logo\n", + "* One-page brochure for each specific product or feature (e.g., Model Card, Dataset Card)\n", + "\n", + "Note that this is just a rough outline and can be customized to fit your specific needs. The image and design elements used should be consistent throughout the brochure and online presence." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Task 4: Make a brochure using the web-content\n", + "\n", + "# Example usage\n", + "webname, url = 'Huggingface', \"http://www.huggingface.co\"\n", + "\n", + "html_content = get_webpage_content(url)\n", + "links = parse_links(html_content, url)\n", + "\n", + "print(\"Extracted Links:\")\n", + "content = f'Link:{url} -> Content:{html_content}\\n'\n", + "for link in links:\n", + " print(link)\n", + " html_content = get_webpage_content(url)\n", + " content += f'Link:{link} -> Content:{html_content}\\n'\n", + "\n", + "sys_prompt = 'You are a helpful assistant who helps me create a brochure for a website.\\n'\n", + "usr_prompt = f'You are given the contents for a few pages for the website of {webname} following next line.\\n' + \\\n", + " content + \\\n", + " 'Use this information to give the brochure for this company.\\n'\n", + "\n", + "stream = model.ask_model_stream(sys_prompt, usr_prompt)\n", + "\n", + "response = ''\n", + "display_handle = display(Markdown(\"\"), display_id=True)\n", + "\n", + "for chunk in stream:\n", + " response += chunk.choices[0].delta.content or ''\n", + " update_display(Markdown(response), display_id=display_handle.display_id)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "55344cc4-e377-4c75-9b39-87a29674b9f0", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.14" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}