246 lines
7.7 KiB
Plaintext
246 lines
7.7 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "fe12c203-e6a6-452c-a655-afb8a03a4ff5",
|
|
"metadata": {},
|
|
"source": [
|
|
"# End of week 1 exercise\n",
|
|
"\n",
|
|
"To demonstrate your familiarity with OpenAI API, and also Ollama, build a tool that takes a technical question, \n",
|
|
"and responds with an explanation. This is a tool that you will be able to use yourself during the course!"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "c1070317-3ed9-4659-abe3-828943230e03",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# imports\n",
|
|
"from openai import OpenAI\n",
|
|
"from IPython.display import display, Markdown, update_display"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "4a456906-915a-4bfd-bb9d-57e505c5093f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# constants\n",
|
|
"# MODEL_GPT = 'gpt-4o-mini'\n",
|
|
"MODEL_LLAMA = 'llama3.2'"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a8d7923c-5f28-4c30-8556-342d7c8497c1",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# set up environment\n",
|
|
"\n",
|
|
"class LLM_MODEL:\n",
|
|
"\n",
|
|
" def ask_model(self, sys_prompt, usr_prompt):\n",
|
|
" model_url = 'http://localhost:11434/v1/'\n",
|
|
" client = OpenAI(base_url=model_url, api_key='ollama')\n",
|
|
" msg = [{'role':'system', 'content':sys_prompt},{'role':'user', 'content':usr_prompt}]\n",
|
|
" response = client.chat.completions.create(model=MODEL_LLAMA, messages=msg)\n",
|
|
" return response.choices[0].message.content\n",
|
|
"\n",
|
|
" def ask_model_stream(self, sys_prompt, usr_prompt):\n",
|
|
" model_url = 'http://localhost:11434/v1/'\n",
|
|
" client = OpenAI(base_url=model_url, api_key='ollama')\n",
|
|
" msg = [{'role':'system', 'content':sys_prompt},{'role':'user', 'content':usr_prompt}]\n",
|
|
" stream = client.chat.completions.create(model=MODEL_LLAMA, messages=msg, stream=True)\n",
|
|
" return stream\n",
|
|
"\n",
|
|
"model = LLM_MODEL()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "6f448d69-3cec-4915-8697-f1046ba23e4a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Task 1: Tight Speed\n",
|
|
"\n",
|
|
"sys_prompt = 'You are a helpful assistant who helps me understand technical questions.\\n'\n",
|
|
"usr_prompt = 'It takes Alex 2 hours to travel a distance of 3 kms. What is the speed of Alex?'\n",
|
|
"\n",
|
|
"resp = model.ask_model(sys_prompt, usr_prompt)\n",
|
|
"display(Markdown(resp))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "3f0d0137-52b0-47a8-81a8-11a90a010798",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Task 2: Travel the world in X days?\n",
|
|
"\n",
|
|
"sys_prompt = 'You are a helpful assistant who helps me understand technical questions.\\n'\n",
|
|
"usr_prompt = 'There are many cities in our world. Can you tell me how to travel the whole world in least number of days ?'\n",
|
|
"\n",
|
|
"resp = model.ask_model(sys_prompt, usr_prompt)\n",
|
|
"display(Markdown(resp))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "60ce7000-a4a5-4cce-a261-e75ef45063b4",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Task 3: Generate Code for task 4 to scrap some webpages\n",
|
|
"\n",
|
|
"sys_prompt = 'You are a coding expert who generates python code for given problem.\\n'\n",
|
|
"usr_prompt = 'Given a website URL, I want to a python function to get the contents of the webpage, and another function to parse all links in the given webpage text.'\n",
|
|
"\n",
|
|
"resp = model.ask_model(sys_prompt, usr_prompt)\n",
|
|
"print(resp)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "8f7c8ea8-4082-4ad0-8751-3301adcf6538",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Scrap some webpages\n",
|
|
"\n",
|
|
"import requests\n",
|
|
"from bs4 import BeautifulSoup\n",
|
|
"\n",
|
|
"def get_webpage_content(url):\n",
|
|
" \"\"\"\n",
|
|
" Fetches the contents of a website.\n",
|
|
" \n",
|
|
" Args:\n",
|
|
" url (str): URL of the webpage.\n",
|
|
" \n",
|
|
" Returns:\n",
|
|
" str: HTML content of the webpage.\n",
|
|
" \"\"\"\n",
|
|
" try:\n",
|
|
" response = requests.get(url)\n",
|
|
" response.raise_for_status() # Raise an exception for HTTP errors\n",
|
|
" return response.text\n",
|
|
" except requests.exceptions.RequestException as e:\n",
|
|
" print(f\"Error fetching webpage: {e}\")\n",
|
|
" return None\n",
|
|
"\n",
|
|
"def parse_links(html_content, base_url=\"\"):\n",
|
|
" \"\"\"\n",
|
|
" Parses links from a given HTML content.\n",
|
|
" \n",
|
|
" Args:\n",
|
|
" html_content (str): HTML content of the webpage.\n",
|
|
" base_url (str): Base URL to construct relative link URLs. Defaults to \"\".\n",
|
|
" \n",
|
|
" Returns:\n",
|
|
" list: List of extracted URLs.\n",
|
|
" \"\"\"\n",
|
|
" soup = BeautifulSoup(html_content, 'html.parser')\n",
|
|
" links = []\n",
|
|
"\n",
|
|
" for tag in soup.find_all('a'):\n",
|
|
" href = tag.get('href')\n",
|
|
"\n",
|
|
" # Handle absolute and relative URLs\n",
|
|
" if not href or href.startswith('/'):\n",
|
|
" url = \"\"\n",
|
|
" else:\n",
|
|
" if 0 and base_url:\n",
|
|
" url = f\"{base_url}{href}\"\n",
|
|
" else:\n",
|
|
" url = href\n",
|
|
"\n",
|
|
" if url.startswith('https:/'):\n",
|
|
" links.append(url)\n",
|
|
"\n",
|
|
" return links\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "77286a37-7d34-44f0-bbab-abd1d33b21b3",
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Task 4: Make a brochure using the web-content\n",
|
|
"\n",
|
|
"# Example usage\n",
|
|
"webname, url = 'Huggingface', \"http://www.huggingface.co\"\n",
|
|
"\n",
|
|
"html_content = get_webpage_content(url)\n",
|
|
"links = parse_links(html_content, url)\n",
|
|
"\n",
|
|
"print(\"Extracted Links:\")\n",
|
|
"content = f'Link:{url} -> Content:{html_content}\\n'\n",
|
|
"for link in links:\n",
|
|
" print(link)\n",
|
|
" html_content = get_webpage_content(url)\n",
|
|
" content += f'Link:{link} -> Content:{html_content}\\n'\n",
|
|
"\n",
|
|
"sys_prompt = 'You are a helpful assistant who helps me create a brochure for a website.\\n'\n",
|
|
"usr_prompt = f'You are given the contents for a few pages for the website of {webname} following next line.\\n' + \\\n",
|
|
" content + \\\n",
|
|
" 'Use this information to give the brochure for this company.\\n'\n",
|
|
"\n",
|
|
"stream = model.ask_model_stream(sys_prompt, usr_prompt)\n",
|
|
"\n",
|
|
"response = ''\n",
|
|
"display_handle = display(Markdown(\"\"), display_id=True)\n",
|
|
"\n",
|
|
"for chunk in stream:\n",
|
|
" response += chunk.choices[0].delta.content or ''\n",
|
|
" update_display(Markdown(response), display_id=display_handle.display_id)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "55344cc4-e377-4c75-9b39-87a29674b9f0",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.12.12"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|