diff --git a/week1/community-contributions/Day1_2_Reddit_Analysis/Day1_Day2_Outputs.pdf b/week1/community-contributions/Day1_2_Reddit_Analysis/Day1_Day2_Outputs.pdf new file mode 100644 index 0000000..e10cbab Binary files /dev/null and b/week1/community-contributions/Day1_2_Reddit_Analysis/Day1_Day2_Outputs.pdf differ diff --git a/week1/community-contributions/Day1_2_Reddit_Analysis/Day1_RedditAnalysis_gpt.ipynb b/week1/community-contributions/Day1_2_Reddit_Analysis/Day1_RedditAnalysis_gpt.ipynb new file mode 100644 index 0000000..6f8304e --- /dev/null +++ b/week1/community-contributions/Day1_2_Reddit_Analysis/Day1_RedditAnalysis_gpt.ipynb @@ -0,0 +1,409 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "9413d98a-352a-47b7-b84b-5b4a61b3c002", + "metadata": {}, + "source": [ + "# Reddit Post Analysis" + ] + }, + { + "cell_type": "markdown", + "id": "97ebfa77-33f8-4cd1-9204-d73aeefc0fea", + "metadata": {}, + "source": [ + "1. **Sets the Role and Tone** \n", + " Instructs the AI to act as an **expert analyst** specializing in extracting insights from online forums like Reddit.\n", + "\n", + "2. **Guides Sentiment Analysis** \n", + " Asks the AI to evaluate overall sentiment (e.g., positive, neutral, negative), and to present it as approximate percentages with a brief rationale.\n", + "\n", + "3. **Groups and Labels Themes** \n", + " Instructs the AI to identify and cluster **key discussion themes**, perspectives, and emotional tones. Each theme should be explained and illustrated with **example comments**.\n", + "\n", + "4. **Creates an Insights Table** \n", + " Requests a structured table with fields like *Perspectives, Frustrations, Tools, Suggestions* to concisely summarize the discussion’s core insights.\n", + "\n", + "5. **Describes Community Dynamics** \n", + " Asks the AI to assess the **interaction style** (e.g., supportive, sarcastic, argumentative) and note any social patterns (e.g., consensus or conflict)." + ] + }, + { + "cell_type": "markdown", + "id": "425868ba-faec-4754-87f5-650f7529b319", + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, + "source": [ + "#### Imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9596f40f-5add-4602-91e3-cd7d2c753c33", + "metadata": {}, + "outputs": [], + "source": [ + "import praw\n", + "import os\n", + "import requests\n", + "from dotenv import load_dotenv\n", + "from IPython.display import Markdown, display, Image\n", + "from openai import OpenAI" + ] + }, + { + "cell_type": "markdown", + "id": "9e1a9999-4aad-416d-90fe-3b0841a4f455", + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, + "source": [ + "#### Load Credentials" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "847843ce-ebf9-4f48-b625-82e3ed687c81", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# Load environment variables in a file called .env\n", + "\n", + "load_dotenv(override=True)\n", + "api_key = os.getenv('OPENAI_API_KEY')\n", + "\n", + "# Check the key\n", + "\n", + "if not api_key:\n", + " print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n", + "elif not api_key.startswith(\"sk-proj-\"):\n", + " print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n", + "elif api_key.strip() != api_key:\n", + " print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n", + "else:\n", + " print(\"API key found and looks good so far!\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c615d79b-55a0-4eb1-ad8b-a2e28c11b49e", + "metadata": {}, + "outputs": [], + "source": [ + "reddit = praw.Reddit(\n", + " client_id=os.getenv(\"REDDIT_CLIENT_ID\"),\n", + " client_secret=os.getenv(\"REDDIT_CLIENT_SECRET\"),\n", + " user_agent=os.getenv(\"REDDIT_USER_AGENT\"),\n", + " username=os.getenv(\"REDDIT_USERNAME\"),\n", + " password=os.getenv(\"REDDIT_PASSWORD\")\n", + ")\n", + "\n", + "print(\"Authenticated as:\", reddit.user.me())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6df2224d-ecfd-4e07-9bc8-102eff257d69", + "metadata": {}, + "outputs": [], + "source": [ + "openai = OpenAI()" + ] + }, + { + "cell_type": "markdown", + "id": "21ba0482-79e5-45ec-81d7-8611312c6b9e", + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, + "source": [ + "#### Reddit Post Scraper" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8dc5276d-2d38-4651-9db0-c353076d6096", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "class RedditPostScraper:\n", + " def __init__(self, url):\n", + " self.submission = reddit.submission(url=url)\n", + " self.submission.comments.replace_more(limit=None)\n", + " self._title = self.submission.title\n", + " self._text = self.submission.selftext\n", + " self._comments = \"\"\n", + " self._formatted_comments = [] # for reprocessing if needed\n", + "\n", + " def _generate_comments(self):\n", + " comments_list = []\n", + " for top_level in self.submission.comments:\n", + " top_author = top_level.author.name if top_level.author else \"[deleted]\"\n", + " comments_list.append(f\"{top_author}: {top_level.body}\")\n", + "\n", + " for reply in top_level.replies:\n", + " reply_author = reply.author.name if reply.author else \"[deleted]\"\n", + " comments_list.append(\n", + " f\"{reply_author} replied to {top_author}'s comment: {reply.body}\"\n", + " )\n", + " self._formatted_comments = comments_list\n", + "\n", + " def title(self):\n", + " return f\"Title:\\n{self._title}\\n{self._text}\"\n", + "\n", + " def comments(self, max_words=None):\n", + " if not self._formatted_comments:\n", + " self._generate_comments()\n", + "\n", + " output_comments = []\n", + " total_words = 0\n", + "\n", + " for comment in self._formatted_comments:\n", + " word_count = len(comment.split())\n", + " if max_words and total_words + word_count > max_words:\n", + " break\n", + " output_comments.append(comment)\n", + " total_words += word_count\n", + "\n", + " return \"Text:\\n\" + \"\\n\\n\".join(output_comments)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3121cad0-4e2c-4d78-88e2-e72c6b99e2bf", + "metadata": {}, + "outputs": [], + "source": [ + "# post = RedditPostScraper(\"https://www.reddit.com/r/running/comments/1l77osa/pushing_through_a_run/\")\n", + "# print(post.title())\n", + "# print(post.comments(2000))" + ] + }, + { + "cell_type": "markdown", + "id": "569760f6-5d68-40c1-9227-374c8e04d70a", + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, + "source": [ + "#### System and User Prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "22c0e89a-c076-4616-ae9b-b4cd588f39ad", + "metadata": {}, + "outputs": [], + "source": [ + "system_prompt = '''You are an expert analyst specializing in extracting insights from online discussion forums. You will be given the title of a Reddit post and a list of comments (some with replies). Your task is to analyze the sentiment of the discussion and extract structured insights that reflect the collective responses.\n", + "\n", + "Your response **must be in well-formatted Markdown**. Use clear section headers (`##`, `###`), bullet points, and tables where appropriate.\n", + "\n", + "Perform the following tasks:\n", + "\n", + "---\n", + "\n", + "## 1. Overall Sentiment Breakdown\n", + "\n", + "- Determine the overall sentiment of the responses (e.g., positive, negative, neutral, mixed).\n", + "- Express the sentiment as approximate percentages (e.g., 60% positive, 25% neutral, 15% negative).\n", + "- Provide a short explanation for why the sentiment skews this way, referring to tone, topic sensitivity, controversy, humor, or supportiveness.\n", + "\n", + "---\n", + "\n", + "## 2. Thematic Grouping of Comments\n", + "\n", + "- Identify key recurring **themes, perspectives, or discussion threads** in the comments.\n", + "- For each theme, create a subheading.\n", + "- Under each:\n", + " - Briefly describe the focus or tone of that cluster (e.g., personal stories, criticism, questions, jokes).\n", + " - Include 1–2 **example comments** using quote formatting (`>`), preferably ones with replies or high engagement.\n", + "\n", + "---\n", + "\n", + "## 3. Insights Table\n", + "\n", + "If applicable, extract and structure insights into the following table. Leave any column empty if it’s not relevant to the post type:\n", + "\n", + "| Perspectives/ Motivations | Pains/ Concerns/ Frustrations | Tools / References / Resources | Suggestions / Solutions |\n", + "|-------------------------------|----------------------------------|--------------------------------------|------------------------------------|\n", + "| - ... | - ... | - ... | - ... |\n", + "\n", + "- Populate this table with concise bullet points.\n", + "- Adapt categories to match the discussion type (e.g., switch \"Suggestions\" to \"Reactions\" if it's a news thread).\n", + "\n", + "---\n", + "\n", + "## 4. Tone and Community Dynamics\n", + "\n", + "- Comment on the **style and culture** of interaction: humor, sarcasm, empathy, trolling, intellectual debate, etc.\n", + "- Mention any noticeable social dynamics: agreement/disagreement, echo chambers, respectful debate, or hostility.\n", + "- Include casual or emotional comments if they illustrate community personality.\n", + "\n", + "---\n", + "\n", + "**Respond only in well-formatted Markdown.** Structure your output for clarity and insight, suitable for rendering in documentation, reports, or dashboards. Do not summarize every comment — focus on patterns, perspectives, and collective signals.\n", + "\n", + "'''" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cf9d15d6-4f9a-45fd-96ed-d7097c7f03d6", + "metadata": {}, + "outputs": [], + "source": [ + "def user_prompt_for(post):\n", + " user_prompt = f\"You are looking at a Reddit discussion titled:\\n\\n{post.title()}\\n\\n\"\n", + " user_prompt += \"Below are the responses from various users. Analyze them according to the system prompt provided.\\n\"\n", + " user_prompt += \"Make sure your response is structured in Markdown with headers, lists, and tables as instructed.\\n\\n\"\n", + " user_prompt += post.comments(4000)\n", + " return user_prompt\n" + ] + }, + { + "cell_type": "markdown", + "id": "f18c581c-ea30-4a43-9223-8c184dedb37e", + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, + "source": [ + "#### Generating Responses" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aadf8f41-aca3-41be-b18b-cb49a67ba256", + "metadata": {}, + "outputs": [], + "source": [ + "def messages_for(website):\n", + " return [\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": user_prompt_for(website)}\n", + " ]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "feac9c61-f1f8-48f0-9189-bc60ac7fd755", + "metadata": {}, + "outputs": [], + "source": [ + "def summarize(url):\n", + " website = RedditPostScraper(url)\n", + " response = openai.chat.completions.create(\n", + " model = \"gpt-4o-mini\",\n", + " messages = messages_for(website)\n", + " )\n", + " return response.choices[0].message.content" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12b1d6dd-2d62-4136-8b8e-0a92134d4261", + "metadata": {}, + "outputs": [], + "source": [ + "# summarize(\"https://www.reddit.com/r/running/comments/1l77osa/pushing_through_a_run/\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dd48253d-cdca-4c29-b4f2-c470290de63b", + "metadata": {}, + "outputs": [], + "source": [ + "def display_summary(url):\n", + " summary = summarize(url)\n", + " display(Markdown(summary))" + ] + }, + { + "cell_type": "markdown", + "id": "7e0825a9-a3b0-43a0-b69c-cf0ce81d77d2", + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, + "source": [ + "#### Example Usage" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8a61a482-ec70-4e29-b99c-0d82298a32b1", + "metadata": {}, + "outputs": [], + "source": [ + "display_summary(\"https://www.reddit.com/r/running/comments/1l77osa/pushing_through_a_run/\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1a336777-a06e-4535-b68d-a6470eb1d701", + "metadata": {}, + "outputs": [], + "source": [ + "display_summary(\"https://www.reddit.com/r/AskReddit/comments/1lam10k/how_do_you_feel_about_the_no_kings_protest/\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b6b12074-ffb6-4a6d-bdd2-bbbb78f82781", + "metadata": {}, + "outputs": [], + "source": [ + "display_summary(\"https://www.reddit.com/r/canada/comments/1laq8ok/donald_trump_is_a_convicted_felon_could_he_be/\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "63b805e5-183f-439b-bfe7-9ee6bbe4a5b4", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/week1/community-contributions/Day1_2_Reddit_Analysis/README.md b/week1/community-contributions/Day1_2_Reddit_Analysis/README.md new file mode 100644 index 0000000..3c2f2ed --- /dev/null +++ b/week1/community-contributions/Day1_2_Reddit_Analysis/README.md @@ -0,0 +1,59 @@ +# Reddit Post Analyzer – GPT & Open Source Approaches + +This project consists of two Jupyter notebooks that demonstrate different methods for analyzing Reddit post data: + +- **Day 1:** `Day1_RedditAnalysis_gpt.ipynb` – Uses GPT-based sentiment and insight extraction from Reddit posts and comments. +- **Day 2:** `day2_RedditAnalysis_opensource.ipynb` – Implements an open-source alternative for Reddit data processing and basic sentiment/thematic analysis. + +--- + +## 📌 Features + +- Reddit post and comment scraping using PRAW +- GPT-based sentiment summarization and insight structuring (Day 1) +- Open-source sentiment and thematic analysis pipeline (Day 2) +- Markdown-formatted output suitable for reporting + +--- + +## 🛠️ Setup Instructions + +### Reddit API Credentials Setup + +To access Reddit data, you need to create a Reddit app and obtain credentials: + +#### Steps to Get Your Reddit API Keys: + +1. Go to [https://www.reddit.com/prefs/apps](https://www.reddit.com/prefs/apps). +2. Scroll to the bottom and click **“create another app”** or **“create app”**. +3. Choose the **“script”** option. +4. Fill in the following fields: + - **name:** e.g., Reddit Analyzer + - **redirect uri:** `http://localhost:8080` + - **description:** *(optional)* +5. After creating the app, you will get: + - **client ID** (displayed under the app name) + - **client secret** +6. Keep note of your Reddit **username** and **password** (these are used with script apps) + +#### Store your credentials in a `.env` file: + +Create a `.env` file in the root directory with the following format: + +```env +REDDIT_CLIENT_ID=your_client_id +REDDIT_CLIENT_SECRET=your_client_secret +REDDIT_USER_AGENT=your_custom_user_agent +REDDIT_USERNAME=your_reddit_username +REDDIT_PASSWORD=your_reddit_password +``` + +These will be securely loaded into your script using the `dotenv` package. + +--- + +## 🚀 Running the Notebooks + +Make sure to activate your virtual environment (if applicable), install dependencies, and run the notebooks cell by cell in **Jupyter Lab** or **VS Code**. + +--- diff --git a/week1/community-contributions/Day1_2_Reddit_Analysis/day2_RedditAnalysis_opensource.ipynb b/week1/community-contributions/Day1_2_Reddit_Analysis/day2_RedditAnalysis_opensource.ipynb new file mode 100644 index 0000000..1010512 --- /dev/null +++ b/week1/community-contributions/Day1_2_Reddit_Analysis/day2_RedditAnalysis_opensource.ipynb @@ -0,0 +1,436 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "8c22d46c-d08b-4dbd-bdf5-338adce95e1a", + "metadata": {}, + "source": [ + "# Reddit Post Analysis using open source models (llama 3.2, deepseek r1, mistral:7b)" + ] + }, + { + "cell_type": "markdown", + "id": "bfc5335b-53a8-4cd1-b1a8-95496ae4856d", + "metadata": {}, + "source": [ + "1. **Sets the Role and Tone** \n", + " Instructs the AI to act as an **expert analyst** specializing in extracting insights from online forums like Reddit.\n", + "\n", + "2. **Guides Sentiment Analysis** \n", + " Asks the AI to evaluate overall sentiment (e.g., positive, neutral, negative), and to present it as approximate percentages with a brief rationale.\n", + "\n", + "3. **Groups and Labels Themes** \n", + " Instructs the AI to identify and cluster **key discussion themes**, perspectives, and emotional tones. Each theme should be explained and illustrated with **example comments**.\n", + "\n", + "4. **Creates an Insights Table** \n", + " Requests a structured table with fields like *Perspectives, Frustrations, Tools, Suggestions* to concisely summarize the discussion’s core insights.\n", + "\n", + "5. **Describes Community Dynamics** \n", + " Asks the AI to assess the **interaction style** (e.g., supportive, sarcastic, argumentative) and note any social patterns (e.g., consensus or conflict)." + ] + }, + { + "cell_type": "markdown", + "id": "6104a23f-c43a-48dc-a018-cddb8bea75d1", + "metadata": {}, + "source": [ + "#### Imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4e2a9393-7767-488e-a8bf-27c12dca35bd", + "metadata": {}, + "outputs": [], + "source": [ + "import praw\n", + "import os\n", + "from dotenv import load_dotenv\n", + "from IPython.display import Markdown, display\n", + "from openai import OpenAI\n", + "import ollama" + ] + }, + { + "cell_type": "markdown", + "id": "07de5c1d-1930-49ca-a026-2265e5432327", + "metadata": {}, + "source": [ + "#### Load Credentials" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "83fdd570-83a3-4e18-a94e-969c557978d3", + "metadata": {}, + "outputs": [], + "source": [ + "load_dotenv(override=True)\n", + "reddit = praw.Reddit(\n", + " client_id=os.getenv(\"REDDIT_CLIENT_ID\"),\n", + " client_secret=os.getenv(\"REDDIT_CLIENT_SECRET\"),\n", + " user_agent=os.getenv(\"REDDIT_USER_AGENT\"),\n", + " username=os.getenv(\"REDDIT_USERNAME\"),\n", + " password=os.getenv(\"REDDIT_PASSWORD\")\n", + ")\n", + "\n", + "print(\"Authenticated as:\", reddit.user.me())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6a8a58d8-6755-4e22-be97-232c2f7ea07c", + "metadata": {}, + "outputs": [], + "source": [ + "openai = OpenAI()" + ] + }, + { + "cell_type": "markdown", + "id": "f6b5b086-a4aa-40d2-a721-b3b8781d7ccf", + "metadata": {}, + "source": [ + "#### Reddit Post Scraper" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09c7a428-db62-4353-9fa5-d12bbdc4477c", + "metadata": {}, + "outputs": [], + "source": [ + "class RedditPostScraper:\n", + " def __init__(self, url):\n", + " self.submission = reddit.submission(url=url)\n", + " self.submission.comments.replace_more(limit=None)\n", + " self._title = self.submission.title\n", + " self._text = self.submission.selftext\n", + " self._comments = \"\"\n", + " self._formatted_comments = [] # for reprocessing if needed\n", + "\n", + " def _generate_comments(self):\n", + " comments_list = []\n", + " for top_level in self.submission.comments:\n", + " top_author = top_level.author.name if top_level.author else \"[deleted]\"\n", + " comments_list.append(f\"{top_author}: {top_level.body}\")\n", + "\n", + " for reply in top_level.replies:\n", + " reply_author = reply.author.name if reply.author else \"[deleted]\"\n", + " comments_list.append(\n", + " f\"{reply_author} replied to {top_author}'s comment: {reply.body}\"\n", + " )\n", + " self._formatted_comments = comments_list\n", + "\n", + " def title(self):\n", + " return f\"Title:\\n{self._title}\\n{self._text}\"\n", + "\n", + " def comments(self, max_words=None):\n", + " if not self._formatted_comments:\n", + " self._generate_comments()\n", + "\n", + " output_comments = []\n", + " total_words = 0\n", + "\n", + " for comment in self._formatted_comments:\n", + " word_count = len(comment.split())\n", + " if max_words and total_words + word_count > max_words:\n", + " break\n", + " output_comments.append(comment)\n", + " total_words += word_count\n", + "\n", + " return \"Text:\\n\" + \"\\n\\n\".join(output_comments)" + ] + }, + { + "cell_type": "markdown", + "id": "3cece64a-ca54-4961-b04e-40f8057e2e78", + "metadata": {}, + "source": [ + "#### System and User Prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "029de240-398e-4339-b90c-e6e90a96bcb5", + "metadata": {}, + "outputs": [], + "source": [ + "system_prompt = '''You are an expert analyst specializing in extracting insights from online discussion forums. You will be given the title of a Reddit post and a list of comments (some with replies). Your task is to analyze the sentiment of the discussion and extract structured insights that reflect the collective responses.\n", + "Your response **must be in well-formatted Markdown**. Use clear section headers (`##`, `###`), bullet points, and tables where appropriate.\n", + "Perform the following tasks:\n", + "---\n", + "## 1. Overall Sentiment Breakdown\n", + "- Determine the overall sentiment of the responses (e.g., positive, negative, neutral, mixed).\n", + "- Express the sentiment as approximate percentages (e.g., 60% positive, 25% neutral, 15% negative).\n", + "- Provide a short explanation for why the sentiment skews this way, referring to tone, topic sensitivity, controversy, humor, or supportiveness.\n", + "---\n", + "## 2. Thematic Grouping of Comments\n", + "- Identify key recurring **themes, perspectives, or discussion threads** in the comments.\n", + "- For each theme, create a subheading.\n", + "- Under each:\n", + " - Briefly describe the focus or tone of that cluster (e.g., personal stories, criticism, questions, jokes).\n", + " - Include 1–2 **example comments** using quote formatting (`>`), preferably ones with replies or high engagement.\n", + "---\n", + "## 3. Insights Table\n", + "If applicable, extract and structure insights into the following table. Leave any column empty if it’s not relevant to the post type:\n", + "| Perspectives/ Motivations | Pains/ Concerns/ Frustrations | Tools / References / Resources | Suggestions / Solutions |\n", + "|-------------------------------|----------------------------------|--------------------------------------|------------------------------------|\n", + "| - ... | - ... | - ... | - ... |\n", + "- Populate this table with concise bullet points.\n", + "- Adapt categories to match the discussion type (e.g., switch \"Suggestions\" to \"Reactions\" if it's a news thread).\n", + "---\n", + "## 4. Tone and Community Dynamics\n", + "- Comment on the **style and culture** of interaction: humor, sarcasm, empathy, trolling, intellectual debate, etc.\n", + "- Mention any noticeable social dynamics: agreement/disagreement, echo chambers, respectful debate, or hostility.\n", + "- Include casual or emotional comments if they illustrate community personality.\n", + "---\n", + "**Respond only in well-formatted Markdown.** Structure your output for clarity and insight, suitable for rendering in documentation, reports, or dashboards. Do not summarize every comment — focus on patterns, perspectives, and collective signals.\n", + "\n", + "'''" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "350d8eea-005b-474e-9b57-cdb4004d8144", + "metadata": {}, + "outputs": [], + "source": [ + "def user_prompt_for(post):\n", + " user_prompt = f\"You are looking at a Reddit discussion titled:\\n\\n{post.title()}\\n\\n\"\n", + " user_prompt += \"Below are the responses from various users. Analyze them according to the system prompt provided.\\n\"\n", + " user_prompt += \"Make sure your response is structured in Markdown with headers, lists, and tables as instructed.\\n\\n\"\n", + " user_prompt += post.comments(1000)\n", + " return user_prompt\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bf23ed3b-8583-444e-ac62-3d415f771462", + "metadata": {}, + "outputs": [], + "source": [ + "# post = RedditPostScraper(\"https://www.reddit.com/r/running/comments/1l77osa/pushing_through_a_run/\")\n", + "# print(post.title())\n", + "# print(post.comments())" + ] + }, + { + "cell_type": "markdown", + "id": "4e37f2e1-6eef-4c27-a442-97a6ff3dbf2a", + "metadata": {}, + "source": [ + "#### Generating messages" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0781921b-e4e0-49f8-b34a-fd1017be6150", + "metadata": {}, + "outputs": [], + "source": [ + "def messages_for(website):\n", + " return [\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": user_prompt_for(website)}\n", + " ]" + ] + }, + { + "cell_type": "markdown", + "id": "544c81a2-37c2-491e-8ef4-ac5d56173b72", + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, + "source": [ + "#### llama 3.2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d3dd0a2a-ddf2-4bd1-823d-b49fa44a09ec", + "metadata": {}, + "outputs": [], + "source": [ + "ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n", + "def summarizellama(url):\n", + " website = RedditPostScraper(url)\n", + " response = ollama_via_openai.chat.completions.create(\n", + " model = \"llama3.2\",\n", + " messages = messages_for(website)\n", + " )\n", + " return response.choices[0].message.content" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "717ccb6d-f6c9-4f36-ad69-686f3f1bd26b", + "metadata": {}, + "outputs": [], + "source": [ + "def display_summaryllama(url):\n", + " summary = summarizellama(url)\n", + " display(Markdown(summary))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2f981fe9-ed2d-4546-8fb3-c0f8048e3474", + "metadata": {}, + "outputs": [], + "source": [ + "display_summaryllama(\"https://www.reddit.com/r/running/comments/1l77osa/pushing_through_a_run/\")" + ] + }, + { + "cell_type": "markdown", + "id": "e3091dcf-f8b3-4d1a-a85c-3a9ebed2ac6c", + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, + "source": [ + "#### deepseek" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "55e465fa-e29d-4ed3-8f44-71964d2f866b", + "metadata": {}, + "outputs": [], + "source": [ + "ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n", + "def summarizedeepseek(url):\n", + " website = RedditPostScraper(url)\n", + " response = ollama_via_openai.chat.completions.create(\n", + " model = \"deepseek-r1\",\n", + " messages = messages_for(website)\n", + " )\n", + " return response.choices[0].message.content" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "40c26a89-97a8-4883-857a-fb13fea9222d", + "metadata": {}, + "outputs": [], + "source": [ + "def display_summarydeepseek(url):\n", + " summary = summarizedeepseek(url)\n", + " display(Markdown(summary))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "362b871e-8f4d-47fa-b01d-bbe3082dd271", + "metadata": {}, + "outputs": [], + "source": [ + "display_summarydeepseek(\"https://www.reddit.com/r/running/comments/1l77osa/pushing_through_a_run/\")" + ] + }, + { + "cell_type": "markdown", + "id": "3841bb1e-e885-4cb5-88f6-b6698ccbb77f", + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, + "source": [ + "#### Mistral" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6d913e07-31b4-439d-a861-c4fd99012588", + "metadata": {}, + "outputs": [], + "source": [ + "!ollama pull mistral:7b" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ab881745-990c-4158-935b-36075c1dacde", + "metadata": {}, + "outputs": [], + "source": [ + "ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n", + "def summarizeMistral(url):\n", + " website = RedditPostScraper(url)\n", + " response = ollama_via_openai.chat.completions.create(\n", + " model = \"mistral:7b\",\n", + " messages = messages_for(website)\n", + " )\n", + " return response.choices[0].message.content" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5de3db6-ba69-43e8-9f6c-0945dbafa308", + "metadata": {}, + "outputs": [], + "source": [ + "def display_summaryMistral(url):\n", + " summary = summarizeMistral(url)\n", + " display(Markdown(summary))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7ea97e30-44be-45dc-ad2f-b6951ecc0190", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "display_summaryMistral(\"https://www.reddit.com/r/running/comments/1l77osa/pushing_through_a_run/\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "38e4aabe-b111-4ddb-af6c-6d4ff7d6f26b", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}