diff --git a/week1/community-contributions/day1_exercise-recipe_formatter.ipynb b/week1/community-contributions/day1_exercise-recipe_formatter.ipynb new file mode 100644 index 0000000..df936bf --- /dev/null +++ b/week1/community-contributions/day1_exercise-recipe_formatter.ipynb @@ -0,0 +1,239 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "cab13efd-a1f4-4077-976e-e3912511117f", + "metadata": {}, + "outputs": [], + "source": [ + "# imports\n", + "\n", + "import os\n", + "import re\n", + "from dotenv import load_dotenv\n", + "from IPython.display import Markdown, display\n", + "from openai import OpenAI" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "c226f54b-325c-49b1-9d99-207a8e306682", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: youtube_transcript_api in /Users/nachogonzalezbullon/miniconda3/envs/llms/lib/python3.11/site-packages (1.1.1)\n", + "Requirement already satisfied: defusedxml<0.8.0,>=0.7.1 in /Users/nachogonzalezbullon/miniconda3/envs/llms/lib/python3.11/site-packages (from youtube_transcript_api) (0.7.1)\n", + "Requirement already satisfied: requests in /Users/nachogonzalezbullon/miniconda3/envs/llms/lib/python3.11/site-packages (from youtube_transcript_api) (2.32.4)\n", + "Requirement already satisfied: charset_normalizer<4,>=2 in /Users/nachogonzalezbullon/miniconda3/envs/llms/lib/python3.11/site-packages (from requests->youtube_transcript_api) (3.4.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/nachogonzalezbullon/miniconda3/envs/llms/lib/python3.11/site-packages (from requests->youtube_transcript_api) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/nachogonzalezbullon/miniconda3/envs/llms/lib/python3.11/site-packages (from requests->youtube_transcript_api) (2.5.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/nachogonzalezbullon/miniconda3/envs/llms/lib/python3.11/site-packages (from requests->youtube_transcript_api) (2025.7.9)\n" + ] + } + ], + "source": [ + "!pip install youtube_transcript_api" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "717fc2a4-b6c5-4027-9e6b-05e83c38d02f", + "metadata": {}, + "outputs": [], + "source": [ + "from youtube_transcript_api import YouTubeTranscriptApi" + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": 4, + "source": [ + "# Load environment variables in a file called .env\n", + "\n", + "load_dotenv(override=True)\n", + "api_key = os.getenv('OPENAI_API_KEY')" + ], + "id": "3caca469-5f39-4592-bf12-c8832c44de19" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "class YouTubeRecipeExtractor:\n", + "\n", + " def __init__(self):\n", + " self.openai = OpenAI()\n", + " self.system_prompt = self.get_system_prompt()\n", + "\n", + " def get_system_prompt(self):\n", + " return \"\"\"\n", + " You are a professional chef and nutritionist specializing in recipe writting.\n", + "\n", + " Your task is to write recipes in a very comprehensive and consistent manner.\n", + " Each recipe will contain a list of ingredients and a list of steps to follow.\n", + " The quantities of the ingredients should always be referred to an official unit (grams, litres, etc). If the original recipe uses a different unit (such as cup, teaspoons, etc.) make the transformation but keep the original instruction between parenthesis.\n", + " The steps should be described in a very synthetic and concise manner. You should avoid being verbose, but the step should be understandable and easy to follow for non-expert people.\n", + " To each recipe add a general analysis from nutrition perspective (number of calories per serving, proteins, fat, etc.).\n", + " Use Markdown to improve readability.\n", + " If the text you receive is not a recipe, return a kind message explaining the situation.\n", + " \"\"\"\n", + "\n", + " def extract_video_id(self, url):\n", + " \"\"\"Extract video ID from YouTube URL\"\"\"\n", + " pattern = r'(?:youtube\\.com/watch\\?v=|youtu\\.be/|youtube\\.com/embed/)([^&\\n?#]+)'\n", + " match = re.search(pattern, url)\n", + " return match.group(1) if match else None\n", + "\n", + " def get_transcription(self, video_id):\n", + " try:\n", + " print(f\"Fetching video transcript for video {video_id}...\")\n", + " transcript = YouTubeTranscriptApi.get_transcript(video_id)\n", + " return \" \".join([item['text'] for item in transcript])\n", + " except Exception as e:\n", + " print(f\"Error fetching transcript: {e}\")\n", + " return None\n", + "\n", + " def format_recipe(self, transcript):\n", + " try:\n", + " response = self.openai.chat.completions.create(\n", + " model=\"gpt-4o-mini\",\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": self.system_prompt},\n", + " {\"role\": \"user\", \"content\": f\"Summarize the following YouTube recipe:\\n\\n{transcript}\"}\n", + " ]\n", + " )\n", + " return response.choices[0].message.content\n", + " except Exception as e:\n", + " print(f\"Error summarizing text: {e}\")\n", + " return None\n", + "\n", + " def display_recipe(self, url):\n", + " transcript = self.get_transcription(self.extract_video_id(url))\n", + " recipe = self.format_recipe(transcript)\n", + " display(Markdown(recipe))\n" + ], + "id": "29e44cb5-0928-4ac9-9681-efd6ba1e359f" + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "98ea2d01-f949-4e03-9154-fe524cf64ca4", + "metadata": {}, + "outputs": [], + "source": [ + "test_bad_url = \"https://www.youtube.com/watch?v=hzGiTUTi060\"\n", + "test_good_url = \"https://www.youtube.com/watch?v=D_2DBLAt57c\"" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "362e39e8-a254-4f2f-8653-5fbb7ff0e1e9", + "metadata": {}, + "outputs": [], + "source": [ + "extractor = YouTubeRecipeExtractor()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "0cc259bd-46bb-4472-b3cb-f39da54e324a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fetching video transcript...\n" + ] + }, + { + "data": { + "text/markdown": [ + "Thank you for your interest, but the text you provided is not a recipe. If you're looking for cooking instructions, ingredient lists, or nutrition analysis, please provide a specific food or dish you would like to know about, and I'd be happy to help!" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "extractor.display_recipe(test_bad_url)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "3f43e320-ca55-4db5-bc95-71fcb342cf3c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fetching video transcript for video D_2DBLAt57c...\n", + "Error fetching transcript: YouTubeTranscriptApi.fetch() missing 1 required positional argument: 'self'\n" + ] + }, + { + "data": { + "text/markdown": [ + "It seems like you haven't provided a recipe or any details to summarize. If you have a specific recipe in mind, please share it, and I'll be happy to help!" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "extractor.display_recipe(test_good_url)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11c5c2b3-498a-43eb-9b68-d2b920c56b10", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}