{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "cab13efd-a1f4-4077-976e-e3912511117f", "metadata": {}, "outputs": [], "source": [ "# imports\n", "\n", "import os\n", "import re\n", "from dotenv import load_dotenv\n", "from IPython.display import Markdown, display\n", "from openai import OpenAI" ] }, { "cell_type": "code", "execution_count": 2, "id": "c226f54b-325c-49b1-9d99-207a8e306682", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: youtube_transcript_api in /Users/nachogonzalezbullon/miniconda3/envs/llms/lib/python3.11/site-packages (1.1.1)\n", "Requirement already satisfied: defusedxml<0.8.0,>=0.7.1 in /Users/nachogonzalezbullon/miniconda3/envs/llms/lib/python3.11/site-packages (from youtube_transcript_api) (0.7.1)\n", "Requirement already satisfied: requests in /Users/nachogonzalezbullon/miniconda3/envs/llms/lib/python3.11/site-packages (from youtube_transcript_api) (2.32.4)\n", "Requirement already satisfied: charset_normalizer<4,>=2 in /Users/nachogonzalezbullon/miniconda3/envs/llms/lib/python3.11/site-packages (from requests->youtube_transcript_api) (3.4.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /Users/nachogonzalezbullon/miniconda3/envs/llms/lib/python3.11/site-packages (from requests->youtube_transcript_api) (3.10)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/nachogonzalezbullon/miniconda3/envs/llms/lib/python3.11/site-packages (from requests->youtube_transcript_api) (2.5.0)\n", "Requirement already satisfied: certifi>=2017.4.17 in /Users/nachogonzalezbullon/miniconda3/envs/llms/lib/python3.11/site-packages (from requests->youtube_transcript_api) (2025.7.9)\n" ] } ], "source": [ "!pip install youtube_transcript_api" ] }, { "cell_type": "code", "execution_count": 3, "id": "717fc2a4-b6c5-4027-9e6b-05e83c38d02f", "metadata": {}, "outputs": [], "source": [ "from youtube_transcript_api import YouTubeTranscriptApi" ] }, { "metadata": {}, "cell_type": "code", "outputs": [], "execution_count": 4, "source": [ "# Load environment variables in a file called .env\n", "\n", "load_dotenv(override=True)\n", "api_key = os.getenv('OPENAI_API_KEY')" ], "id": "3caca469-5f39-4592-bf12-c8832c44de19" }, { "metadata": {}, "cell_type": "code", "outputs": [], "execution_count": null, "source": [ "class YouTubeRecipeExtractor:\n", "\n", " def __init__(self):\n", " self.openai = OpenAI()\n", " self.system_prompt = self.get_system_prompt()\n", "\n", " def get_system_prompt(self):\n", " return \"\"\"\n", " You are a professional chef and nutritionist specializing in recipe writting.\n", "\n", " Your task is to write recipes in a very comprehensive and consistent manner.\n", " Each recipe will contain a list of ingredients and a list of steps to follow.\n", " The quantities of the ingredients should always be referred to an official unit (grams, litres, etc). If the original recipe uses a different unit (such as cup, teaspoons, etc.) make the transformation but keep the original instruction between parenthesis.\n", " The steps should be described in a very synthetic and concise manner. You should avoid being verbose, but the step should be understandable and easy to follow for non-expert people.\n", " To each recipe add a general analysis from nutrition perspective (number of calories per serving, proteins, fat, etc.).\n", " Use Markdown to improve readability.\n", " If the text you receive is not a recipe, return a kind message explaining the situation.\n", " \"\"\"\n", "\n", " def extract_video_id(self, url):\n", " \"\"\"Extract video ID from YouTube URL\"\"\"\n", " pattern = r'(?:youtube\\.com/watch\\?v=|youtu\\.be/|youtube\\.com/embed/)([^&\\n?#]+)'\n", " match = re.search(pattern, url)\n", " return match.group(1) if match else None\n", "\n", " def get_transcription(self, video_id):\n", " try:\n", " print(f\"Fetching video transcript for video {video_id}...\")\n", " transcript = YouTubeTranscriptApi.get_transcript(video_id)\n", " return \" \".join([item['text'] for item in transcript])\n", " except Exception as e:\n", " print(f\"Error fetching transcript: {e}\")\n", " return None\n", "\n", " def format_recipe(self, transcript):\n", " try:\n", " response = self.openai.chat.completions.create(\n", " model=\"gpt-4o-mini\",\n", " messages=[\n", " {\"role\": \"system\", \"content\": self.system_prompt},\n", " {\"role\": \"user\", \"content\": f\"Summarize the following YouTube recipe:\\n\\n{transcript}\"}\n", " ]\n", " )\n", " return response.choices[0].message.content\n", " except Exception as e:\n", " print(f\"Error summarizing text: {e}\")\n", " return None\n", "\n", " def display_recipe(self, url):\n", " transcript = self.get_transcription(self.extract_video_id(url))\n", " recipe = self.format_recipe(transcript)\n", " display(Markdown(recipe))\n" ], "id": "29e44cb5-0928-4ac9-9681-efd6ba1e359f" }, { "cell_type": "code", "execution_count": 6, "id": "98ea2d01-f949-4e03-9154-fe524cf64ca4", "metadata": {}, "outputs": [], "source": [ "test_bad_url = \"https://www.youtube.com/watch?v=hzGiTUTi060\"\n", "test_good_url = \"https://www.youtube.com/watch?v=D_2DBLAt57c\"" ] }, { "cell_type": "code", "execution_count": 7, "id": "362e39e8-a254-4f2f-8653-5fbb7ff0e1e9", "metadata": {}, "outputs": [], "source": [ "extractor = YouTubeRecipeExtractor()\n" ] }, { "cell_type": "code", "execution_count": 8, "id": "0cc259bd-46bb-4472-b3cb-f39da54e324a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Fetching video transcript...\n" ] }, { "data": { "text/markdown": [ "Thank you for your interest, but the text you provided is not a recipe. If you're looking for cooking instructions, ingredient lists, or nutrition analysis, please provide a specific food or dish you would like to know about, and I'd be happy to help!" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "extractor.display_recipe(test_bad_url)" ] }, { "cell_type": "code", "execution_count": 8, "id": "3f43e320-ca55-4db5-bc95-71fcb342cf3c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Fetching video transcript for video D_2DBLAt57c...\n", "Error fetching transcript: YouTubeTranscriptApi.fetch() missing 1 required positional argument: 'self'\n" ] }, { "data": { "text/markdown": [ "It seems like you haven't provided a recipe or any details to summarize. If you have a specific recipe in mind, please share it, and I'll be happy to help!" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "extractor.display_recipe(test_good_url)" ] }, { "cell_type": "code", "execution_count": null, "id": "11c5c2b3-498a-43eb-9b68-d2b920c56b10", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.13" } }, "nbformat": 4, "nbformat_minor": 5 }