{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "cab13efd-a1f4-4077-976e-e3912511117f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# imports\n",
    "\n",
    "import os\n",
    "import re\n",
    "from dotenv import load_dotenv\n",
    "from IPython.display import Markdown, display\n",
    "from openai import OpenAI"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "c226f54b-325c-49b1-9d99-207a8e306682",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: youtube_transcript_api in /Users/nachogonzalezbullon/miniconda3/envs/llms/lib/python3.11/site-packages (1.1.1)\n",
      "Requirement already satisfied: defusedxml<0.8.0,>=0.7.1 in /Users/nachogonzalezbullon/miniconda3/envs/llms/lib/python3.11/site-packages (from youtube_transcript_api) (0.7.1)\n",
      "Requirement already satisfied: requests in /Users/nachogonzalezbullon/miniconda3/envs/llms/lib/python3.11/site-packages (from youtube_transcript_api) (2.32.4)\n",
      "Requirement already satisfied: charset_normalizer<4,>=2 in /Users/nachogonzalezbullon/miniconda3/envs/llms/lib/python3.11/site-packages (from requests->youtube_transcript_api) (3.4.2)\n",
      "Requirement already satisfied: idna<4,>=2.5 in /Users/nachogonzalezbullon/miniconda3/envs/llms/lib/python3.11/site-packages (from requests->youtube_transcript_api) (3.10)\n",
      "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/nachogonzalezbullon/miniconda3/envs/llms/lib/python3.11/site-packages (from requests->youtube_transcript_api) (2.5.0)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in /Users/nachogonzalezbullon/miniconda3/envs/llms/lib/python3.11/site-packages (from requests->youtube_transcript_api) (2025.7.9)\n"
     ]
    }
   ],
   "source": [
    "!pip install youtube_transcript_api"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "717fc2a4-b6c5-4027-9e6b-05e83c38d02f",
   "metadata": {},
   "outputs": [],
   "source": [
    "from youtube_transcript_api import YouTubeTranscriptApi"
   ]
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": 4,
   "source": [
    "# Load environment variables in a file called .env\n",
    "\n",
    "load_dotenv(override=True)\n",
    "api_key = os.getenv('OPENAI_API_KEY')"
   ],
   "id": "3caca469-5f39-4592-bf12-c8832c44de19"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "class YouTubeRecipeExtractor:\n",
    "\n",
    "    def __init__(self):\n",
    "        self.openai = OpenAI()\n",
    "        self.system_prompt = self.get_system_prompt()\n",
    "\n",
    "    def get_system_prompt(self):\n",
    "        return \"\"\"\n",
    "        You are a professional chef and nutritionist specializing in recipe writting.\n",
    "\n",
    "        Your task is to write recipes in a very comprehensive and consistent manner.\n",
    "        Each recipe will contain a list of ingredients and a list of steps to follow.\n",
    "        The quantities of the ingredients should always be referred to an official unit (grams, litres, etc). If the original recipe uses a different unit (such as cup, teaspoons, etc.) make the transformation but keep the original instruction between parenthesis.\n",
    "        The steps should be described in a very synthetic and concise manner. You should avoid being verbose, but the step should be understandable and easy to follow for non-expert people.\n",
    "        To each recipe add a general analysis from nutrition perspective (number of calories per serving, proteins, fat, etc.).\n",
    "        Use Markdown to improve readability.\n",
    "        If the text you receive is not a recipe, return a kind message explaining the situation.\n",
    "        \"\"\"\n",
    "\n",
    "    def extract_video_id(self, url):\n",
    "        \"\"\"Extract video ID from YouTube URL\"\"\"\n",
    "        pattern = r'(?:youtube\\.com/watch\\?v=|youtu\\.be/|youtube\\.com/embed/)([^&\\n?#]+)'\n",
    "        match = re.search(pattern, url)\n",
    "        return match.group(1) if match else None\n",
    "\n",
    "    def get_transcription(self, video_id):\n",
    "        try:\n",
    "            print(f\"Fetching video transcript for video {video_id}...\")\n",
    "            transcript = YouTubeTranscriptApi.get_transcript(video_id)\n",
    "            return \" \".join([item['text'] for item in transcript])\n",
    "        except Exception as e:\n",
    "            print(f\"Error fetching transcript: {e}\")\n",
    "            return None\n",
    "\n",
    "    def format_recipe(self, transcript):\n",
    "        try:\n",
    "            response = self.openai.chat.completions.create(\n",
    "                model=\"gpt-4o-mini\",\n",
    "                messages=[\n",
    "                    {\"role\": \"system\", \"content\": self.system_prompt},\n",
    "                    {\"role\": \"user\", \"content\": f\"Summarize the following YouTube recipe:\\n\\n{transcript}\"}\n",
    "                ]\n",
    "            )\n",
    "            return response.choices[0].message.content\n",
    "        except Exception as e:\n",
    "            print(f\"Error summarizing text: {e}\")\n",
    "            return None\n",
    "\n",
    "    def display_recipe(self, url):\n",
    "        transcript = self.get_transcription(self.extract_video_id(url))\n",
    "        recipe = self.format_recipe(transcript)\n",
    "        display(Markdown(recipe))\n"
   ],
   "id": "29e44cb5-0928-4ac9-9681-efd6ba1e359f"
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "98ea2d01-f949-4e03-9154-fe524cf64ca4",
   "metadata": {},
   "outputs": [],
   "source": [
    "test_bad_url = \"https://www.youtube.com/watch?v=hzGiTUTi060\"\n",
    "test_good_url = \"https://www.youtube.com/watch?v=D_2DBLAt57c\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "362e39e8-a254-4f2f-8653-5fbb7ff0e1e9",
   "metadata": {},
   "outputs": [],
   "source": [
    "extractor = YouTubeRecipeExtractor()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "0cc259bd-46bb-4472-b3cb-f39da54e324a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fetching video transcript...\n"
     ]
    },
    {
     "data": {
      "text/markdown": [
       "Thank you for your interest, but the text you provided is not a recipe. If you're looking for cooking instructions, ingredient lists, or nutrition analysis, please provide a specific food or dish you would like to know about, and I'd be happy to help!"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "extractor.display_recipe(test_bad_url)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "3f43e320-ca55-4db5-bc95-71fcb342cf3c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fetching video transcript for video D_2DBLAt57c...\n",
      "Error fetching transcript: YouTubeTranscriptApi.fetch() missing 1 required positional argument: 'self'\n"
     ]
    },
    {
     "data": {
      "text/markdown": [
       "It seems like you haven't provided a recipe or any details to summarize. If you have a specific recipe in mind, please share it, and I'll be happy to help!"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "extractor.display_recipe(test_good_url)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "11c5c2b3-498a-43eb-9b68-d2b920c56b10",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}