Files
2025-10-03 07:45:36 +02:00

260 lines
10 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "code",
"id": "initial_id",
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2025-10-02T18:07:54.689902Z",
"start_time": "2025-10-02T18:07:54.330580Z"
}
},
"source": [
"import os\n",
"import json\n",
"from dotenv import load_dotenv\n",
"from IPython.display import Markdown, display\n",
"from openai import OpenAI\n",
"\n",
"from website import Website"
],
"outputs": [],
"execution_count": 1
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-10-02T18:07:58.182655Z",
"start_time": "2025-10-02T18:07:58.176747Z"
}
},
"cell_type": "code",
"source": [
"link_system_prompt = \"You are provided with a list of links found on a Italian restaurant webpage. \\\n",
"You are able to decide which of the links would be most relevant to include in the restaurant menu, \\\n",
"such as links to an menu pdf file, Menù page, Piatti, or Bevande.\\n\"\n",
"link_system_prompt += \"You should respond in JSON as in this example:\"\n",
"link_system_prompt += \"\"\"\n",
"{\n",
" \"links\": [\n",
" {\"type\": \"menu pdf\", \"url\": \"https://www.ristoranteapprodo.com/Documenti/MenuEstivo2024.pdf\"},\n",
" {\"type\": \"menu page\", \"url\": \"https://www.giocapizza.com/men%C3%B9\"}\n",
" ]\n",
"}\n",
"\"\"\""
],
"id": "ff5d21dc8dd6bd29",
"outputs": [],
"execution_count": 3
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-10-02T18:08:01.823456Z",
"start_time": "2025-10-02T18:08:01.119076Z"
}
},
"cell_type": "code",
"source": [
"load_dotenv(override=True)\n",
"api_key = os.getenv('OPENAI_API_KEY')\n",
"\n",
"if api_key and api_key.startswith('sk-proj-') and len(api_key) > 10:\n",
" print(\"API key looks good so far\")\n",
"else:\n",
" print(\"There might be a problem with your API key? Please visit the troubleshooting notebook!\")\n",
"\n",
"MODEL = 'gpt-4o-mini'\n",
"openai = OpenAI()\n",
"\n",
"ed = Website(\"https://www.giocapizza.com/\")\n",
"print(ed.links)"
],
"id": "bae61e79319ead26",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"API key looks good so far\n",
"['https://www.giocapizza.com', 'tel:349-6705657', 'https://www.instagram.com/giocapizza/', 'https://www.facebook.com/giocapizza/', 'https://www.tripadvisor.it/Restaurant_Review-g2337656-d17784755-Reviews-Gioca_Pizza-Adrara_San_Martino_Province_of_Bergamo_Lombardy.html', 'https://www.youtube.com/@GiocaPizza', 'https://www.pinterest.jp/giocapizza/', 'https://www.giocapizza.com', 'https://www.giocapizza.com/incorniciate', 'https://www.giocapizza.com/menù', 'https://www.giocapizza.com/servizi', 'https://www.giocapizza.com/menù', 'https://www.giocapizza.com/incorniciate', 'https://www.giocapizza.com/incorniciate', 'https://www.giocapizza.com/incorniciate', 'mailto:giocapizza@gmail.com', 'http://www.sinapsisnc.com']\n"
]
}
],
"execution_count": 4
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-10-02T18:08:05.104624Z",
"start_time": "2025-10-02T18:08:05.102463Z"
}
},
"cell_type": "code",
"source": [
"def get_links_user_prompt(website):\n",
" user_prompt = f\"Here is the list of links on the italian restaurant website of {website.url} - \"\n",
" user_prompt += \"please decide which of these are relevant web links for the restaurant menu, respond with the full https URL in JSON format.\"\n",
" user_prompt += \"Links (some might be relative links):\\n\"\n",
" user_prompt += \"\\n\".join(website.links)\n",
" return user_prompt\n"
],
"id": "1b5a43ae68ed636",
"outputs": [],
"execution_count": 5
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-10-02T18:08:08.740268Z",
"start_time": "2025-10-02T18:08:08.734461Z"
}
},
"cell_type": "code",
"source": [
"def get_links(url):\n",
" website = Website(url)\n",
" response = openai.chat.completions.create(\n",
" model=MODEL,\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": link_system_prompt},\n",
" {\"role\": \"user\", \"content\": get_links_user_prompt(website)}\n",
" ],\n",
" response_format={\"type\": \"json_object\"}\n",
" )\n",
" result = response.choices[0].message.content\n",
" return json.loads(result)\n"
],
"id": "69e91ccd319153f7",
"outputs": [],
"execution_count": 6
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-10-02T18:08:15.402276Z",
"start_time": "2025-10-02T18:08:15.397800Z"
}
},
"cell_type": "code",
"source": [
"def get_all_details(url):\n",
" result = \"Landing page:\\n\"\n",
" result += Website(url).get_contents()\n",
" links = get_links(url)\n",
" print(\"Found links:\", links)\n",
" for link in links[\"links\"]:\n",
" result += f\"\\n\\n{link['type']}\\n\"\n",
" result += Website(link[\"url\"]).get_contents()\n",
" return result\n"
],
"id": "e76a1deea9a05353",
"outputs": [],
"execution_count": 8
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-10-02T18:08:18.390851Z",
"start_time": "2025-10-02T18:08:18.387630Z"
}
},
"cell_type": "code",
"source": [
"system_prompt = \"You are an assistant that analyzes the contents of several menu pages from an italian restaurant website \\\n",
"and creates restaurant menu with dishes and prices in Euro. Respond in markdown.\"\n",
"\n",
"def get_restaurant_menu_user_prompt(company_name, url):\n",
" user_prompt = f\"You are looking at a restaurant called: {company_name}\\n\"\n",
" user_prompt += f\"Here are the contents of its landing page and other relevant pages; use this information to build a restaurant menu in markdown.\\n\"\n",
" user_prompt += get_all_details(url)\n",
" user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters\n",
" return user_prompt\n"
],
"id": "5f60f05dab091ec7",
"outputs": [],
"execution_count": 9
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-10-02T18:08:20.804552Z",
"start_time": "2025-10-02T18:08:20.800766Z"
}
},
"cell_type": "code",
"source": [
"def create_restaurant_menu(company_name, url):\n",
" response = openai.chat.completions.create(\n",
" model=MODEL,\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": get_restaurant_menu_user_prompt(company_name, url)}\n",
" ],\n",
" )\n",
" result = response.choices[0].message.content\n",
" display(Markdown(result))"
],
"id": "32c64d933b194bc7",
"outputs": [],
"execution_count": 10
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-10-02T18:08:55.009134Z",
"start_time": "2025-10-02T18:08:32.164709Z"
}
},
"cell_type": "code",
"source": "create_restaurant_menu(\"La Cascina\", \"https://www.lacascinacredaro.it/\")",
"id": "19bbd3984732895d",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Found links: {'links': [{'type': 'piatti', 'url': 'http://www.byserviziinternet.com/cascina/#piatti'}]}\n"
]
},
{
"data": {
"text/plain": [
"<IPython.core.display.Markdown object>"
],
"text/markdown": "# La Cascina Ristorante Pizzeria Menu\n\n## Antipasti (Starters)\n- **Bruschetta al Pomodoro** - €5.00 \n Grilled bread topped with fresh tomatoes, garlic, and basil.\n\n- **Crostini Toscani** - €7.00 \n Toasted bread with traditional chicken liver pâté.\n\n- **Tagliere di Salumi** - €9.00 \n Selection of cured meats served with pickles and bread.\n\n## Primi Piatti (First Courses)\n- **Gnocchetti di Patate con Erbette** - €10.00 \n Potato gnocchi with a blend of seasonal greens.\n\n- **Paccheri con Polipetti** - €12.00 \n Large tubular pasta with baby octopus in a tomato sauce.\n\n- **Risotto ai Frutti di Mare** - €15.00 \n Arborio rice cooked with fresh seafood.\n\n- **Tagliolini al Tartufo** - €14.00 \n Homemade tagliolini pasta with truffle sauce.\n\n- **Zuppa di Cipolle** - €8.00 \n Traditional onion soup topped with melted cheese.\n\n## Secondi Piatti (Main Courses)\n- **Filetto di Manzo** - €18.00 \n Grilled beef fillet served with a side of seasonal vegetables.\n\n- **Pollo alla Griglia** - €12.00 \n Grilled chicken breast served with rosemary potatoes.\n\n- **Branzino al Forno** - €17.00 \n Oven-baked sea bass served with a lemon-herb sauce.\n\n## Pizze (Pizzas)\n- **Margherita** - €8.00 \n Classic pizza with tomato sauce, mozzarella, and basil.\n\n- **Diavola** - €10.00 \n Spicy salami pizza with tomato sauce and mozzarella.\n\n- **Funghi e Prosciutto** - €11.00 \n Pizza topped with mushrooms and ham.\n\n- **Vegetariana** - €9.50 \n Mixed vegetable pizza with mozzarella.\n\n## Dessert\n- **Tiramisu** - €5.00 \n Classic coffee-flavored Italian dessert.\n\n- **Panna Cotta** - €5.50 \n Creamy dessert served with berry sauce.\n\n- **Gelato** - €4.00 \n Selection of homemade ice creams.\n\n## Bevande (Beverages)\n- **Acqua Naturale / Frizzante** - €2.50 \n Still or sparkling water.\n\n- **Birra Artigianale** - €4.00 \n Local craft beer.\n\n- **Vino della Casa** - €5.50 / glass \n House wine selection.\n\nFor reservations or inquiries, please contact us at +39 035 936383. \n**Address:** Via L. Cadorna, 9, 24060 - Credaro (BG) \n**Closed on Wednesdays**."
},
"metadata": {},
"output_type": "display_data",
"jetTransient": {
"display_id": null
}
}
],
"execution_count": 11
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}