added youtube transcript summarizer
This commit is contained in:
173
week1/community-contributions/day-1-youtube-video-summary.ipynb
Normal file
173
week1/community-contributions/day-1-youtube-video-summary.ipynb
Normal file
@@ -0,0 +1,173 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 78,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from IPython.display import Markdown, display\n",
|
||||
"from openai import OpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install youtube_transcript_api"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 79,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from youtube_transcript_api import YouTubeTranscriptApi"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 80,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load environment variables in a file called .env\n",
|
||||
"\n",
|
||||
"load_dotenv(override=True)\n",
|
||||
"api_key = os.getenv('OPENAI_API_KEY')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 92,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import re\n",
|
||||
"\n",
|
||||
"class YouTubeWebLink:\n",
|
||||
" def __init__(self, url):\n",
|
||||
" self.url = url\n",
|
||||
" self.video_id = self.get_video_id(url)\n",
|
||||
" self.set_openai_client()\n",
|
||||
" self.set_system_prompt()\n",
|
||||
"\n",
|
||||
" def get_video_id(self, url):\n",
|
||||
" \"\"\" extract youtube video id from url with regular expression \"\"\"\n",
|
||||
" regex = r\"(?:v=|be/)([a-zA-Z0-9_-]{11})\"\n",
|
||||
" match = re.search(regex, url)\n",
|
||||
" if match:\n",
|
||||
" return match.group(1)\n",
|
||||
" else:\n",
|
||||
" raise ValueError(\"Probably not a YouTube URL\")\n",
|
||||
" \n",
|
||||
" def set_openai_client(self):\n",
|
||||
" self.openai = OpenAI()\n",
|
||||
" \n",
|
||||
" def set_system_prompt(self, system_prompt=None):\n",
|
||||
" \"\"\" set system prompt from youtube video \"\"\"\n",
|
||||
" self.system_prompt = \"\"\"\n",
|
||||
" You are a skilled explainer and storyteller who specializes in summarizing YouTube video transcripts in a way that's both engaging and informative. \n",
|
||||
" Your task is to:\n",
|
||||
" - Capture key points and main ideas of the video\n",
|
||||
" - Structure your summary with in clear sections\n",
|
||||
" - Include important details, facts, and figures mentioned\n",
|
||||
" - Never end your summary with a \"Conclusion\" section\n",
|
||||
" - Keep the summary short and easy to understand\n",
|
||||
" - Always format your response in markdown for better readability\n",
|
||||
" \"\"\" if system_prompt is None else system_prompt\n",
|
||||
"\n",
|
||||
" def get_transcript(self):\n",
|
||||
" \"\"\" get transcript from youtube video \"\"\"\n",
|
||||
" try:\n",
|
||||
" print('Fetching video transcript...')\n",
|
||||
" transcript = YouTubeTranscriptApi.get_transcript(self.video_id)\n",
|
||||
" return \" \".join([item['text'] for item in transcript])\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"Error fetching transcript: {e}\")\n",
|
||||
" return None\n",
|
||||
" \n",
|
||||
" def get_summary_from_transcript(self, transcript):\n",
|
||||
" \"\"\" summarize text using openai \"\"\"\n",
|
||||
" try:\n",
|
||||
" print('Summarizing video...')\n",
|
||||
" response = self.openai.chat.completions.create(\n",
|
||||
" model=\"gpt-4o-mini\",\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"system\", \"content\": self.system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": f\"Summarize the following YouTube video transcript:\\n\\n{transcript}\"}\n",
|
||||
" ]\n",
|
||||
" )\n",
|
||||
" return response.choices[0].message.content\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"Error summarizing text: {e}\")\n",
|
||||
" return None\n",
|
||||
"\n",
|
||||
" def display_summary(self):\n",
|
||||
" \"\"\" summarize youtube video \"\"\"\n",
|
||||
" transcript = self.get_transcript()\n",
|
||||
" summary = self.get_summary_from_transcript(transcript)\n",
|
||||
" display(Markdown(summary))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 93,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# video link and share link of same youtube video\n",
|
||||
"test_url_1 = \"https://www.youtube.com/watch?v=nYy-umCNKPQ&list=PLWHe-9GP9SMMdl6SLaovUQF2abiLGbMjs\"\n",
|
||||
"test_url_2 = \"https://youtu.be/nYy-umCNKPQ?si=ILVrQlKT0W71G5pU\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Test that we get same id\n",
|
||||
"video1, video2 = YouTubeWebLink(test_url_1), YouTubeWebLink(test_url_2)\n",
|
||||
"video1.video_id, video2.video_id"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"video1.display_summary()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "llms",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
Reference in New Issue
Block a user