Files
LLM_Engineering_OLD/community-contributions/sach91-bootcamp/week1-exercise.ipynb
2025-10-25 01:46:22 +05:30

246 lines
7.7 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"id": "fe12c203-e6a6-452c-a655-afb8a03a4ff5",
"metadata": {},
"source": [
"# End of week 1 exercise\n",
"\n",
"To demonstrate your familiarity with OpenAI API, and also Ollama, build a tool that takes a technical question, \n",
"and responds with an explanation. This is a tool that you will be able to use yourself during the course!"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c1070317-3ed9-4659-abe3-828943230e03",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"from openai import OpenAI\n",
"from IPython.display import display, Markdown, update_display"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4a456906-915a-4bfd-bb9d-57e505c5093f",
"metadata": {},
"outputs": [],
"source": [
"# constants\n",
"# MODEL_GPT = 'gpt-4o-mini'\n",
"MODEL_LLAMA = 'llama3.2'"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a8d7923c-5f28-4c30-8556-342d7c8497c1",
"metadata": {},
"outputs": [],
"source": [
"# set up environment\n",
"\n",
"class LLM_MODEL:\n",
"\n",
" def ask_model(self, sys_prompt, usr_prompt):\n",
" model_url = 'http://localhost:11434/v1/'\n",
" client = OpenAI(base_url=model_url, api_key='ollama')\n",
" msg = [{'role':'system', 'content':sys_prompt},{'role':'user', 'content':usr_prompt}]\n",
" response = client.chat.completions.create(model=MODEL_LLAMA, messages=msg)\n",
" return response.choices[0].message.content\n",
"\n",
" def ask_model_stream(self, sys_prompt, usr_prompt):\n",
" model_url = 'http://localhost:11434/v1/'\n",
" client = OpenAI(base_url=model_url, api_key='ollama')\n",
" msg = [{'role':'system', 'content':sys_prompt},{'role':'user', 'content':usr_prompt}]\n",
" stream = client.chat.completions.create(model=MODEL_LLAMA, messages=msg, stream=True)\n",
" return stream\n",
"\n",
"model = LLM_MODEL()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6f448d69-3cec-4915-8697-f1046ba23e4a",
"metadata": {},
"outputs": [],
"source": [
"# Task 1: Tight Speed\n",
"\n",
"sys_prompt = 'You are a helpful assistant who helps me understand technical questions.\\n'\n",
"usr_prompt = 'It takes Alex 2 hours to travel a distance of 3 kms. What is the speed of Alex?'\n",
"\n",
"resp = model.ask_model(sys_prompt, usr_prompt)\n",
"display(Markdown(resp))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3f0d0137-52b0-47a8-81a8-11a90a010798",
"metadata": {},
"outputs": [],
"source": [
"# Task 2: Travel the world in X days?\n",
"\n",
"sys_prompt = 'You are a helpful assistant who helps me understand technical questions.\\n'\n",
"usr_prompt = 'There are many cities in our world. Can you tell me how to travel the whole world in least number of days ?'\n",
"\n",
"resp = model.ask_model(sys_prompt, usr_prompt)\n",
"display(Markdown(resp))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "60ce7000-a4a5-4cce-a261-e75ef45063b4",
"metadata": {},
"outputs": [],
"source": [
"# Task 3: Generate Code for task 4 to scrap some webpages\n",
"\n",
"sys_prompt = 'You are a coding expert who generates python code for given problem.\\n'\n",
"usr_prompt = 'Given a website URL, I want to a python function to get the contents of the webpage, and another function to parse all links in the given webpage text.'\n",
"\n",
"resp = model.ask_model(sys_prompt, usr_prompt)\n",
"print(resp)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8f7c8ea8-4082-4ad0-8751-3301adcf6538",
"metadata": {},
"outputs": [],
"source": [
"# Scrap some webpages\n",
"\n",
"import requests\n",
"from bs4 import BeautifulSoup\n",
"\n",
"def get_webpage_content(url):\n",
" \"\"\"\n",
" Fetches the contents of a website.\n",
" \n",
" Args:\n",
" url (str): URL of the webpage.\n",
" \n",
" Returns:\n",
" str: HTML content of the webpage.\n",
" \"\"\"\n",
" try:\n",
" response = requests.get(url)\n",
" response.raise_for_status() # Raise an exception for HTTP errors\n",
" return response.text\n",
" except requests.exceptions.RequestException as e:\n",
" print(f\"Error fetching webpage: {e}\")\n",
" return None\n",
"\n",
"def parse_links(html_content, base_url=\"\"):\n",
" \"\"\"\n",
" Parses links from a given HTML content.\n",
" \n",
" Args:\n",
" html_content (str): HTML content of the webpage.\n",
" base_url (str): Base URL to construct relative link URLs. Defaults to \"\".\n",
" \n",
" Returns:\n",
" list: List of extracted URLs.\n",
" \"\"\"\n",
" soup = BeautifulSoup(html_content, 'html.parser')\n",
" links = []\n",
"\n",
" for tag in soup.find_all('a'):\n",
" href = tag.get('href')\n",
"\n",
" # Handle absolute and relative URLs\n",
" if not href or href.startswith('/'):\n",
" url = \"\"\n",
" else:\n",
" if 0 and base_url:\n",
" url = f\"{base_url}{href}\"\n",
" else:\n",
" url = href\n",
"\n",
" if url.startswith('https:/'):\n",
" links.append(url)\n",
"\n",
" return links\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "77286a37-7d34-44f0-bbab-abd1d33b21b3",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"# Task 4: Make a brochure using the web-content\n",
"\n",
"# Example usage\n",
"webname, url = 'Huggingface', \"http://www.huggingface.co\"\n",
"\n",
"html_content = get_webpage_content(url)\n",
"links = parse_links(html_content, url)\n",
"\n",
"print(\"Extracted Links:\")\n",
"content = f'Link:{url} -> Content:{html_content}\\n'\n",
"for link in links:\n",
" print(link)\n",
" html_content = get_webpage_content(url)\n",
" content += f'Link:{link} -> Content:{html_content}\\n'\n",
"\n",
"sys_prompt = 'You are a helpful assistant who helps me create a brochure for a website.\\n'\n",
"usr_prompt = f'You are given the contents for a few pages for the website of {webname} following next line.\\n' + \\\n",
" content + \\\n",
" 'Use this information to give the brochure for this company.\\n'\n",
"\n",
"stream = model.ask_model_stream(sys_prompt, usr_prompt)\n",
"\n",
"response = ''\n",
"display_handle = display(Markdown(\"\"), display_id=True)\n",
"\n",
"for chunk in stream:\n",
" response += chunk.choices[0].delta.content or ''\n",
" update_display(Markdown(response), display_id=display_handle.display_id)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "55344cc4-e377-4c75-9b39-87a29674b9f0",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}