AI stock adviser webscraping notebook
This commit is contained in:
354
week1/community-contributions/day1- stock adviser webscrap.ipynb
Normal file
354
week1/community-contributions/day1- stock adviser webscrap.ipynb
Normal file
@@ -0,0 +1,354 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"id": "2e40e4f0-4f65-4f68-be50-07401959f46e",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"import requests\n",
|
||||||
|
"from dotenv import load_dotenv\n",
|
||||||
|
"from bs4 import BeautifulSoup\n",
|
||||||
|
"from IPython.display import Markdown, display\n",
|
||||||
|
"from openai import OpenAI"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"id": "fea8f921-7f2f-4942-9f88-cb6eb64ea731",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"API key found and looks good so far!\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# Load environment variables in a file called .env\n",
|
||||||
|
"\n",
|
||||||
|
"load_dotenv()\n",
|
||||||
|
"api_key = os.getenv('OPENAI_API_KEY')\n",
|
||||||
|
"\n",
|
||||||
|
"# Check the key\n",
|
||||||
|
"\n",
|
||||||
|
"if not api_key:\n",
|
||||||
|
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
|
||||||
|
"elif not api_key.startswith(\"sk-proj-\"):\n",
|
||||||
|
" print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
|
||||||
|
"elif api_key.strip() != api_key:\n",
|
||||||
|
" print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
|
||||||
|
"else:\n",
|
||||||
|
" print(\"API key found and looks good so far!\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"id": "8d90ba3b-e50e-4a7d-820f-e669ea3679ff",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"#call open AI\n",
|
||||||
|
"openai = OpenAI()\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"id": "046a59c6-56f5-4a09-89bd-8163075ad643",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# A class to represent a Webpage\n",
|
||||||
|
"\n",
|
||||||
|
"headers = {\n",
|
||||||
|
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
|
||||||
|
"}\n",
|
||||||
|
"class Website:\n",
|
||||||
|
" def __init__(self, url):\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" Create this Website object for a Finance latest news\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" self.url = url\n",
|
||||||
|
" response = requests.get(url, headers=headers)\n",
|
||||||
|
" soup = BeautifulSoup(response.content, 'html.parser')\n",
|
||||||
|
" \n",
|
||||||
|
" self.title = soup.title.string if soup.title else \"No title found\"\n",
|
||||||
|
" \n",
|
||||||
|
" # Find news headlines and content \n",
|
||||||
|
" news_data = []\n",
|
||||||
|
" \n",
|
||||||
|
" # Try different selectors \n",
|
||||||
|
" news_items = soup.find_all('h3') + soup.find_all('h2')\n",
|
||||||
|
" \n",
|
||||||
|
" for item in news_items:\n",
|
||||||
|
" headline = item.get_text(strip=True)\n",
|
||||||
|
" if headline and len(headline) > 20: # Filter out short/empty text\n",
|
||||||
|
" # Try to find content near the headline\n",
|
||||||
|
" content = \"\"\n",
|
||||||
|
" parent = item.find_parent()\n",
|
||||||
|
" if parent:\n",
|
||||||
|
" # Look for paragraph or summary text\n",
|
||||||
|
" summary = parent.find('p')\n",
|
||||||
|
" if summary:\n",
|
||||||
|
" content = summary.get_text(strip=True)[:300] + \"...\"\n",
|
||||||
|
" \n",
|
||||||
|
" news_data.append({'headline': headline, 'content': content})\n",
|
||||||
|
" \n",
|
||||||
|
" # Create the text content\n",
|
||||||
|
" self.text = \"Latest financial news headlines:\\n\\n\"\n",
|
||||||
|
" \n",
|
||||||
|
" # Get top 5 headlines with content\n",
|
||||||
|
" for i, news in enumerate(news_data[:10], 1):\n",
|
||||||
|
" self.text += f\"{i}. {news['headline']}\\n\"\n",
|
||||||
|
" if news['content']:\n",
|
||||||
|
" self.text += f\" Summary: {news['content']}\\n\"\n",
|
||||||
|
" self.text += \"\\n\"\n",
|
||||||
|
" \n",
|
||||||
|
" if not news_data:\n",
|
||||||
|
" self.text = \"No headlines found. Yahoo Finance structure may have changed.\"\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 5,
|
||||||
|
"id": "b5b1c72e-bc74-4ed0-9a64-795ca9bac74d",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Title: Yahoo Finance - Stock Market Live, Quotes, Business & Finance News\n",
|
||||||
|
"Top News:\n",
|
||||||
|
"Latest financial news headlines:\n",
|
||||||
|
"\n",
|
||||||
|
"1. US Risks Losing ‘Reliable Investment’ Status, Allianz GI Manager Says\n",
|
||||||
|
" Summary: (Bloomberg) -- Inside one of Europe’s biggest asset managers, there’s growing concern that Republican efforts to gut legislation supporting key industries such as clean energy may result in the US losing its status as a destination for investor capital.Most Read from BloombergNY Private School Plead...\n",
|
||||||
|
"\n",
|
||||||
|
"2. Why Intempus thinks robots should have a human physiological state\n",
|
||||||
|
" Summary: Teddy Warner, 19, has always been interested in robotics. His family was in the industry, and he says he \"grew up\" working in a machinist shop while in high school. Now Warner is building a robotics company of his own, Intempus, that looks to make robots a bit more human. Intempus is building tech t...\n",
|
||||||
|
"\n",
|
||||||
|
"3. Last 24 hours: TechCrunch Disrupt 2025 Early Bird Deals will fly away after today\n",
|
||||||
|
" Summary: Just 24 hours left to lock in Early Bird pricing for TechCrunch Disrupt 2025 — happening October 27–29 at Moscone West in San Francisco. Save up to $900 on your pass, or bring someone brilliant with you for 90% off their ticket. This deal ends tonight at 11:59 p.m. PT. Grab your Early Bird discount ...\n",
|
||||||
|
"\n",
|
||||||
|
"4. 48 hours left: What you won’t want to miss at the 20th TechCrunch Disrupt in October\n",
|
||||||
|
" Summary: There are just 48 hours left to save up to $900 on your ticket to TechCrunch Disrupt 2025 — and get 90% off the second. After May 25 at 11:59 p.m. PT, Early Bird pricing vanishes — along with your best chance to join 10,000 of tech’s most forward-thinking minds for less. But forget the math for a ...\n",
|
||||||
|
"\n",
|
||||||
|
"5. More than a third of Americans say they want an 'adventurous retirement'\n",
|
||||||
|
" Summary: Retirement is no longer just about rocking chairs, gardening, grandchildren, or afternoons on the golf course....\n",
|
||||||
|
"\n",
|
||||||
|
"6. 'Unsustainable fiscal situation': Wall Street braces for more bond market turmoil as Trump tax bill stirs up deficit concerns\n",
|
||||||
|
" Summary: Surging Treasury yields signal deepening market fears as Trump's tax plan, soaring deficits, and global fiscal turmoil shake investor confidence....\n",
|
||||||
|
"\n",
|
||||||
|
"7. Nvidia has lost its shock power to investors, for now\n",
|
||||||
|
" Summary: Nvidia's quarter may be tougher than normal to assess. Here's why....\n",
|
||||||
|
"\n",
|
||||||
|
"8. Nvidia earnings, Trump tariff updates, and the Fed's preferred inflation gauge: What to know this week\n",
|
||||||
|
" Summary: A quarterly earnings release from Nvidia is set to greet investors in the week ahead as the stock market rally has hit pause....\n",
|
||||||
|
"\n",
|
||||||
|
"9. This week in Trumponomics: Bonds spoil the party\n",
|
||||||
|
" Summary: Trump is heading toward an important victory on tax cuts. Instead of cheering, markets are fretting....\n",
|
||||||
|
"\n",
|
||||||
|
"10. Manufacturers could benefit from Trump's 'big, beautiful' bill depending on what they make\n",
|
||||||
|
" Summary: Advocates for the manufacturing sector have hailed the advancement of Trump's \"big, beautiful bill,\" but at least two provisions in the 1,000-plus-page package could cut that ebullience for some factory owners....\n",
|
||||||
|
"\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"website = Website(\"https://finance.yahoo.com/topic/latest-news/\")\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"Title:\", website.title)\n",
|
||||||
|
"print(\"Top News:\")\n",
|
||||||
|
"print(website.text)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 22,
|
||||||
|
"id": "2c0ac856-b0d8-4b15-8092-71ab3952a0d9",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Define our system prompt\n",
|
||||||
|
"system_prompt = \"\"\"You are a veteran stock market and finance expert with 50+ years of experience helping investors make safe, steady gains. Your audience is beginners with small amounts to invest (around $100). \n",
|
||||||
|
"\n",
|
||||||
|
"**Response Format:**\n",
|
||||||
|
"1. Start with \"The News Snapshot:\" - Write 3-4 lines summarizing the key financial developments from the provided headlines and summaries, showing you understand the current market situation, start the write up for this with today in the news we see that...\n",
|
||||||
|
"\n",
|
||||||
|
"2. Give specific stock advice based on the news:\n",
|
||||||
|
" - What to avoid and why\n",
|
||||||
|
" - 2-3 specific stock recommendations with ticker symbols\n",
|
||||||
|
" - Focus only on safe, dividend-paying stocks or clear beneficiaries from the news\n",
|
||||||
|
"\n",
|
||||||
|
"3. End with \"The big picture:\" - One sentence explaining the overall market condition\n",
|
||||||
|
"\n",
|
||||||
|
"4. Close with \"Your game plan:\" - Simple, actionable advice for their $100 to show how to split it\n",
|
||||||
|
"\n",
|
||||||
|
"**Tone & Style:**\n",
|
||||||
|
"- Talk like a knowledgeable but friendly Wall Street professional advising a beginner\n",
|
||||||
|
"- Keep it under 200 words total\n",
|
||||||
|
"- Use simple language, no complex jargon\n",
|
||||||
|
"- Be direct and practical\n",
|
||||||
|
"- Focus on capital preservation over quick gains\n",
|
||||||
|
"- Always relate advice directly to the news headlines provided\n",
|
||||||
|
"\n",
|
||||||
|
"**Key Rules:**\n",
|
||||||
|
"- Only recommend established, safe stocks\n",
|
||||||
|
"- Always explain WHY based on the news\n",
|
||||||
|
"- No speculative or meme stocks\n",
|
||||||
|
"- Emphasize learning over quick profits\"\"\""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 23,
|
||||||
|
"id": "077acf13-6e37-488f-a7c7-5f301266f57f",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# A function that writes a User Prompt that asks for summaries of websites:\n",
|
||||||
|
"\n",
|
||||||
|
"def user_prompt_for(website):\n",
|
||||||
|
" user_prompt = f\"You are looking at a website titled {website.title}\"\n",
|
||||||
|
" user_prompt += \"\\nThe contents of this website is as follows; \\\n",
|
||||||
|
"please provide a provide your investment advice for a beginner with $100. \\\n",
|
||||||
|
"Because it includes finance news or trend, let the advice be based on these too.\\n\\n\"\n",
|
||||||
|
" user_prompt += website.text\n",
|
||||||
|
" return user_prompt"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 24,
|
||||||
|
"id": "1c129909-769c-49f0-a84d-85a25972463b",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def messages_for(website):\n",
|
||||||
|
" return [\n",
|
||||||
|
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
||||||
|
" {\"role\": \"user\", \"content\": user_prompt_for(website)}\n",
|
||||||
|
" ]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 25,
|
||||||
|
"id": "2c9f998f-639f-451b-a67e-5a95978ab70d",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def get_advice(url):\n",
|
||||||
|
" website = Website(url)\n",
|
||||||
|
" response = openai.chat.completions.create(\n",
|
||||||
|
" model = \"gpt-4o-mini\",\n",
|
||||||
|
" messages = messages_for(website)\n",
|
||||||
|
" )\n",
|
||||||
|
" return response.choices[0].message.content"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 10,
|
||||||
|
"id": "402b4bb4-fbf4-4930-9cd1-4ede22491fa2",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"'**The News Snapshot:** Recent headlines reveal rising treasury yields and concerns over the US losing its \"reliable investment\" status, stoking fears of market uncertainty. Amidst this backdrop, investors may want to focus on stable, dividend-paying stocks that can weather the storm and provide consistent returns.\\n\\n**Stock Advice:**\\n- **Avoid speculative tech stocks** like Nvidia, which has recently shown volatility and uncertainty in earnings, leading to a potential loss of investor confidence.\\n- **Recommendation #1: Johnson & Johnson (JNJ)** – A well-established healthcare company that pays a reliable dividend, making it a safe bet in uncertain times.\\n- **Recommendation #2: Procter & Gamble (PG)** – Known for its strong brand portfolio and consistent dividend payouts, PG offers stability and resilience against market fluctuations.\\n- **Recommendation #3: Coca-Cola (KO)** – With a history of dividend increases, Coca-Cola remains a staple in many portfolios, providing that defensive position investors need right now.\\n\\n**The big picture:** The market is showing signs of concern, and investors should prioritize capital preservation over chasing quick returns.\\n\\n**Your game plan:** With your $100, consider investing in fractional shares of JNJ, PG, or KO to benefit from their dividends and stability while learning about long-term investing principles.'"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 10,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"get_advice(\"https://finance.yahoo.com/topic/latest-news/\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 26,
|
||||||
|
"id": "0427753f-6b47-4c36-b68f-0f22abd8a7cd",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def display_fin_advice(url):\n",
|
||||||
|
" advice_content = get_advice(url) \n",
|
||||||
|
" display(Markdown(advice_content))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 27,
|
||||||
|
"id": "1d26e64f-fdd0-4492-9b20-a54847b11139",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/markdown": [
|
||||||
|
"The News Snapshot: Today in the news, we see that concerns are rising around the US potentially losing its appeal as a reliable investment destination due to political actions, particularly in clean energy. Rising Treasury yields and fiscal uncertainty, stemming from tax policies, are causing unease in the markets. Generally, investors are on alert due to potential repercussions for sectors reliant on government support and tax reform.\n",
|
||||||
|
"\n",
|
||||||
|
"Specific Stock Advice:\n",
|
||||||
|
"- I advise avoiding high-growth tech stocks like **Nvidia (NVDA)** for now, as their recent earnings show volatility and uncertainty. \n",
|
||||||
|
"- Instead, consider established dividend-paying stocks like **Johnson & Johnson (JNJ)** and **Procter & Gamble (PG)**. Both companies are less sensitive to political changes and provide steady dividends, making them safer bets during turbulent times.\n",
|
||||||
|
"- Another option is **3M Company (MMM)**, which has a strong history of dividend payments and benefits from potential manufacturing boosts tied to new legislation.\n",
|
||||||
|
"\n",
|
||||||
|
"The big picture: The market is navigating through uncertainties, particularly around fiscal policy and investment confidence.\n",
|
||||||
|
"\n",
|
||||||
|
"Your game plan: Split your $100 into three parts: $40 in Johnson & Johnson, $40 in Procter & Gamble, and keep $20 in cash for future opportunities or to cover transaction fees. This balanced approach aims for safety and steady growth."
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
"<IPython.core.display.Markdown object>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "display_data"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"display_fin_advice(\"https://finance.yahoo.com/topic/latest-news/\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "7567571d-b4c7-41be-9fd0-d65ae533a252",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.11.11"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user