Converted the football game narration notebook into a Python script with clear modular sections. Improved readability by adding cell markers and organizing imports, while maintaining functionality for parsing game data and generating narrations.
246 lines
7.6 KiB
Plaintext
246 lines
7.6 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "31d3c4a4-5442-4074-b812-42d60e0a0c04",
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-04-26T11:54:29.195103Z",
|
|
"start_time": "2025-04-26T11:54:29.192394Z"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# In this example we read a footbal (soccer) game stat and we create a narration about the game as we are running a podcast\n",
|
|
"# use this website as an example: https://understat.com/match/27683"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "cf45e9d5-4913-416c-9880-5be60a96c0e6",
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-04-26T11:54:30.218768Z",
|
|
"start_time": "2025-04-26T11:54:30.215752Z"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import os\n",
|
|
"import requests\n",
|
|
"from dotenv import load_dotenv\n",
|
|
"from IPython.display import Markdown, display\n",
|
|
"from bs4 import BeautifulSoup\n",
|
|
"from openai import OpenAI"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "af8fea69-60aa-430c-a16c-8757b487e07a",
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-04-26T11:54:31.218616Z",
|
|
"start_time": "2025-04-26T11:54:31.214154Z"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"load_dotenv(override=True)\n",
|
|
"api_key = os.getenv('OPENAI_API_KEY')\n",
|
|
"\n",
|
|
"# Check the key\n",
|
|
"\n",
|
|
"if not api_key:\n",
|
|
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
|
|
"elif not api_key.startswith(\"sk-proj-\"):\n",
|
|
" print(\n",
|
|
" \"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
|
|
"elif api_key.strip() != api_key:\n",
|
|
" print(\n",
|
|
" \"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
|
|
"else:\n",
|
|
" print(\"API key found and looks good so far!\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "daee94d2-f82b-43f0-95d1-15370eda1bc7",
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-04-26T11:54:32.216785Z",
|
|
"start_time": "2025-04-26T11:54:32.183600Z"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"openai = OpenAI()\n",
|
|
"url = \"https://understat.com/match/27683\"\n",
|
|
"\n",
|
|
"# If this doesn't work, try Kernel menu >> Restart Kernel and Clear Outputs Of All Cells, then run the cells from the top of this notebook down.\n",
|
|
"# If it STILL doesn't work (horrors!) then please see the Troubleshooting notebook in this folder for full instructions"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "0712dd1d-b6bc-41c6-84ec-d965f696f7aa",
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-04-26T11:54:33.025841Z",
|
|
"start_time": "2025-04-26T11:54:33.023289Z"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"system_prompt = (\"You are a football (soccer) analyst. Yuo are used to read stats of football \\\n",
|
|
" games and extract relevant information. You are asked to be a podcast host and \\\n",
|
|
" you need to create a narration of the game based on the stats you read and based \\\n",
|
|
" on the play by play moves (the one with minutes upfront). You're talking to the \\\n",
|
|
" general audience so try to use a easy language and do not be too much telegraphic\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "70c972a6-8af6-4ff2-a338-6d7ba90e2045",
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-04-26T11:54:33.730097Z",
|
|
"start_time": "2025-04-26T11:54:33.725360Z"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Some websites need you to use proper headers when fetching them:\n",
|
|
"headers = {\n",
|
|
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
|
|
"}\n",
|
|
"\n",
|
|
"class Website:\n",
|
|
" def __init__(self, url):\n",
|
|
" \"\"\"\n",
|
|
" Create this Website object from the given url using the BeautifulSoup library\n",
|
|
" \"\"\"\n",
|
|
" self.url = url\n",
|
|
" response = requests.get(url, headers=headers)\n",
|
|
" soup = BeautifulSoup(response.content, 'html.parser')\n",
|
|
" self.title = soup.title.string if soup.title else \"No title found\"\n",
|
|
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
|
|
" irrelevant.decompose()\n",
|
|
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "4ccc1ba81c76ffb9",
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-04-26T11:54:40.042357Z",
|
|
"start_time": "2025-04-26T11:54:40.040384Z"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"def create_user_prompt(game):\n",
|
|
" user_prompt = f\"You are looking at {game.title} football game\"\n",
|
|
" user_prompt += \"\\nThis is the entire webpage of the game \\\n",
|
|
" Please provide a narration of the game in markdown. \\\n",
|
|
" Focus only on what happened on the game and the stats and ignore all the standings and anything else.\\n\\n\"\n",
|
|
" user_prompt += game.text\n",
|
|
" return user_prompt\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "e729956758b4d7b5",
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-04-26T11:54:40.699042Z",
|
|
"start_time": "2025-04-26T11:54:40.696698Z"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "82b71c1a-895a-48e7-a945-13e615bb0096",
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-04-26T11:54:41.316244Z",
|
|
"start_time": "2025-04-26T11:54:41.314110Z"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Define messages with system_prompt and user_prompt\n",
|
|
"def messages_for(system_prompt_input, user_prompt_input):\n",
|
|
" return [\n",
|
|
" {\"role\": \"system\", \"content\": system_prompt_input},\n",
|
|
" {\"role\": \"user\", \"content\": user_prompt_input}\n",
|
|
" ]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "854dc42e-2bbd-493b-958f-c20484908300",
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-04-26T11:54:55.239164Z",
|
|
"start_time": "2025-04-26T11:54:41.987168Z"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# And now: call the OpenAI API.\n",
|
|
"game = Website(url)\n",
|
|
"\n",
|
|
"response = openai.chat.completions.create(\n",
|
|
" model=\"gpt-4o-mini\",\n",
|
|
" messages=messages_for(system_prompt, create_user_prompt(game))\n",
|
|
")\n",
|
|
"\n",
|
|
"# Response is provided in Markdown and displayed accordingly\n",
|
|
"display(Markdown(response.choices[0].message.content))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "758d2cbe-0f80-4572-8724-7cba77f701dd",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.11.12"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|