Bootcamp: Solisoma(fix:edit only community_contributiions folder)
This commit is contained in:
@@ -0,0 +1,488 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"id": "6df489a5",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# week1 -> day1\n",
|
||||||
|
"import os\n",
|
||||||
|
"from dotenv import load_dotenv\n",
|
||||||
|
"from IPython.display import Markdown, display, update_display\n",
|
||||||
|
"from openai import OpenAI\n",
|
||||||
|
"\n",
|
||||||
|
"#week2 -> day2\n",
|
||||||
|
"import gradio as gr"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 5,
|
||||||
|
"id": "8e7fbf42",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"load_dotenv(override=True)\n",
|
||||||
|
"api_key:str = os.getenv('OPENAI_API_KEY')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 14,
|
||||||
|
"id": "b9266d13",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"class SolveTechnicalQuestions:\n",
|
||||||
|
" _system_prompt = \"\"\"\n",
|
||||||
|
" You are a snarkyassistant that analyzes the contents of a website, \n",
|
||||||
|
" and provides a short, snarky, humorous summary, ignoring text that might be navigation related.\n",
|
||||||
|
" Respond in markdown. Do not wrap the markdown in a code block - respond just with the markdown.\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
" def __init__(self, model: str = \"gpt-4o-mini\") -> None:\n",
|
||||||
|
" self.openai_client = OpenAI()\n",
|
||||||
|
" self._MODEL = model\n",
|
||||||
|
"\n",
|
||||||
|
" def get_user_technical_question_prompt(self, question:str):\n",
|
||||||
|
" prompt = f\"\"\"\n",
|
||||||
|
" Answer this technical questio comprehensively:\n",
|
||||||
|
" Provide:\n",
|
||||||
|
" 1. A clear, accurate answer\n",
|
||||||
|
" 2. Code examples if relevant\n",
|
||||||
|
" 3. Best practices and recommendations\n",
|
||||||
|
" 4. Potential pitfalls or considerations\n",
|
||||||
|
" 5. Additional resources or references if helpful\n",
|
||||||
|
"\n",
|
||||||
|
" Format your response in a structured, easy-to-read manner.\n",
|
||||||
|
"\n",
|
||||||
|
" Question {question}\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
" return prompt\n",
|
||||||
|
" \n",
|
||||||
|
" def set_system_prompt(self, system_prompt: str) -> None:\n",
|
||||||
|
" self._system_prompt = system_prompt\n",
|
||||||
|
" \n",
|
||||||
|
" def set_endpoint(self, endpoint: str, api_key: str = \"ollama\") -> None:\n",
|
||||||
|
" self.openai_client = OpenAI(base_url=endpoint, api_key=api_key)\n",
|
||||||
|
"\n",
|
||||||
|
" def set_model(self, model: str) -> None:\n",
|
||||||
|
" self._MODEL = model\n",
|
||||||
|
"\n",
|
||||||
|
" def start(self, stream=False):\n",
|
||||||
|
" try:\n",
|
||||||
|
" while True:\n",
|
||||||
|
" question = input(\">>> \")\n",
|
||||||
|
" \n",
|
||||||
|
" if question.strip().lower() in ['quit', 'exit', 'q']:\n",
|
||||||
|
" print(\"Goodbye!\")\n",
|
||||||
|
" break\n",
|
||||||
|
" \n",
|
||||||
|
" if not question.strip():\n",
|
||||||
|
" print(\"Please enter a question.\")\n",
|
||||||
|
" continue\n",
|
||||||
|
" \n",
|
||||||
|
" message = self.get_user_technical_question_prompt(question.strip())\n",
|
||||||
|
" \n",
|
||||||
|
" response = self.openai_client.chat.completions.create(\n",
|
||||||
|
" model=self._MODEL, \n",
|
||||||
|
" messages=[\n",
|
||||||
|
" {\"role\": \"system\", \"content\": self._system_prompt},\n",
|
||||||
|
" {\"role\": \"user\", \"content\": message},\n",
|
||||||
|
" ],\n",
|
||||||
|
" stream=stream\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
" if stream:\n",
|
||||||
|
" full_response = \"\"\n",
|
||||||
|
" display_handle = display(Markdown(full_response), display_id=True)\n",
|
||||||
|
" for chunk in response:\n",
|
||||||
|
" if chunk.choices[0].delta.content:\n",
|
||||||
|
" full_response += chunk.choices[0].delta.content\n",
|
||||||
|
" update_display(Markdown(full_response), display_id=display_handle.display_id)\n",
|
||||||
|
" full_response += \"\\n\"\n",
|
||||||
|
" update_display(Markdown(full_response), display_id=display_handle.display_id)\n",
|
||||||
|
" else:\n",
|
||||||
|
" full_response = response.choices[0].message.content\n",
|
||||||
|
" display(Markdown(full_response))\n",
|
||||||
|
" \n",
|
||||||
|
" except KeyboardInterrupt:\n",
|
||||||
|
" print(\"\\nGoodbye!\")\n",
|
||||||
|
" except Exception as e:\n",
|
||||||
|
" print(f\"Error: {e}\")\n",
|
||||||
|
"\n",
|
||||||
|
" def start_with_gradio(self, question:str, stream=False):\n",
|
||||||
|
" if not question.strip():\n",
|
||||||
|
" return \"Please enter a question.\"\n",
|
||||||
|
" \n",
|
||||||
|
" message = self.get_user_technical_question_prompt(question.strip())\n",
|
||||||
|
" \n",
|
||||||
|
" response = self.openai_client.chat.completions.create(\n",
|
||||||
|
" model=self._MODEL, \n",
|
||||||
|
" messages=[\n",
|
||||||
|
" {\"role\": \"system\", \"content\": self._system_prompt},\n",
|
||||||
|
" {\"role\": \"user\", \"content\": message},\n",
|
||||||
|
" ],\n",
|
||||||
|
" stream=stream\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
" if stream:\n",
|
||||||
|
" full_response = \"\"\n",
|
||||||
|
" for chunk in response:\n",
|
||||||
|
" if chunk.choices[0].delta.content:\n",
|
||||||
|
" full_response += chunk.choices[0].delta.content\n",
|
||||||
|
" yield full_response\n",
|
||||||
|
" full_response += \"\\n\"\n",
|
||||||
|
" yield full_response\n",
|
||||||
|
" else:\n",
|
||||||
|
" yield response.choices[0].message.content\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 15,
|
||||||
|
"id": "0bddb2e5",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"TECHNICAL_SYSTEM_PROMPT = \"\"\"\n",
|
||||||
|
"You are an expert technical assistant with deep knowledge in:\n",
|
||||||
|
"\n",
|
||||||
|
"PROGRAMMING & DEVELOPMENT:\n",
|
||||||
|
"- Python, JavaScript, Java, C++, Go, Rust, TypeScript\n",
|
||||||
|
"- Web development (React, Vue, Angular, Node.js)\n",
|
||||||
|
"- Mobile development (iOS, Android, Flutter)\n",
|
||||||
|
"- DevOps (Docker, Kubernetes, CI/CD, AWS, Azure, GCP)\n",
|
||||||
|
"- Database systems (SQL, NoSQL, PostgreSQL, MongoDB)\n",
|
||||||
|
"- Software architecture patterns and best practices\n",
|
||||||
|
"\n",
|
||||||
|
"SYSTEMS & INFRASTRUCTURE:\n",
|
||||||
|
"- Operating systems (Linux, Windows, macOS)\n",
|
||||||
|
"- Networking protocols and security\n",
|
||||||
|
"- Cloud computing and distributed systems\n",
|
||||||
|
"- Monitoring, logging, and observability\n",
|
||||||
|
"- Performance optimization and scaling\n",
|
||||||
|
"\n",
|
||||||
|
"AI & MACHINE LEARNING:\n",
|
||||||
|
"- Machine learning algorithms and frameworks\n",
|
||||||
|
"- Deep learning (TensorFlow, PyTorch)\n",
|
||||||
|
"- Natural language processing\n",
|
||||||
|
"- Computer vision and image processing\n",
|
||||||
|
"- MLOps and model deployment\n",
|
||||||
|
"\n",
|
||||||
|
"RESPONSE GUIDELINES:\n",
|
||||||
|
"1. Provide accurate, up-to-date technical information\n",
|
||||||
|
"2. Include code examples when relevant\n",
|
||||||
|
"3. Explain complex concepts clearly\n",
|
||||||
|
"4. Suggest best practices and alternatives\n",
|
||||||
|
"5. Warn about potential pitfalls or security issues\n",
|
||||||
|
"6. Reference official documentation when appropriate\n",
|
||||||
|
"\n",
|
||||||
|
"Always prioritize accuracy and practical applicability in your technical responses.\n",
|
||||||
|
"\"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
"Chat = SolveTechnicalQuestions()\n",
|
||||||
|
"Chat.set_system_prompt(TECHNICAL_SYSTEM_PROMPT)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "c8675757",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/markdown": [
|
||||||
|
"## Understanding the `for` Loop\n",
|
||||||
|
"\n",
|
||||||
|
"### 1. Clear and Accurate Answer\n",
|
||||||
|
"\n",
|
||||||
|
"A `for` loop is a control flow statement used in programming to iterate over a sequence (like a list, tuple, string, or range) or perform a task a specific number of times. It allows you to execute a block of code repeatedly, which is crucial for automating repetitive tasks.\n",
|
||||||
|
"\n",
|
||||||
|
"### 2. Code Examples\n",
|
||||||
|
"\n",
|
||||||
|
"#### Python Example\n",
|
||||||
|
"\n",
|
||||||
|
"```python\n",
|
||||||
|
"# Iterate over a list\n",
|
||||||
|
"numbers = [1, 2, 3, 4, 5]\n",
|
||||||
|
"for number in numbers:\n",
|
||||||
|
" print(number)\n",
|
||||||
|
"```\n",
|
||||||
|
"\n",
|
||||||
|
"#### JavaScript Example\n",
|
||||||
|
"\n",
|
||||||
|
"```javascript\n",
|
||||||
|
"// Iterate over an array\n",
|
||||||
|
"const numbers = [1, 2, 3, 4, 5];\n",
|
||||||
|
"for (let number of numbers) {\n",
|
||||||
|
" console.log(number);\n",
|
||||||
|
"}\n",
|
||||||
|
"```\n",
|
||||||
|
"\n",
|
||||||
|
"#### Java Example\n",
|
||||||
|
"\n",
|
||||||
|
"```java\n",
|
||||||
|
"// Iterate over an array\n",
|
||||||
|
"int[] numbers = {1, 2, 3, 4, 5};\n",
|
||||||
|
"for (int number : numbers) {\n",
|
||||||
|
" System.out.println(number);\n",
|
||||||
|
"}\n",
|
||||||
|
"```\n",
|
||||||
|
"\n",
|
||||||
|
"### 3. Best Practices and Recommendations\n",
|
||||||
|
"\n",
|
||||||
|
"- **Use Descriptive Variable Names:** This improves code readability. Avoid vague names like `i` or `j`, unless they are commonly used as loop counters.\n",
|
||||||
|
" \n",
|
||||||
|
"- **Limit Loop Complexity:** Ensure that the logic inside the loop is straightforward. If the loop gets complicated, consider refactoring or extracting the logic into a separate function.\n",
|
||||||
|
"\n",
|
||||||
|
"- **Control Iteration with Care:** If you're iterating through large datasets, be mindful of performance impacts and consider alternatives (like list comprehensions in Python).\n",
|
||||||
|
"\n",
|
||||||
|
"### 4. Potential Pitfalls or Considerations\n",
|
||||||
|
"\n",
|
||||||
|
"- **Off-by-One Errors:** These are common when dealing with loop boundaries. Always double-check loop conditions to ensure you don’t miss elements or go out of range.\n",
|
||||||
|
"\n",
|
||||||
|
"- **Infinite Loops:** Ensure that your loop has a condition that eventually becomes false, or it could result in an infinite loop, causing your program to hang.\n",
|
||||||
|
"\n",
|
||||||
|
"- **Modifying the Loop Variable:** Changing the loop variable within the loop’s body can lead to unexpected behaviors, especially in languages like Python.\n",
|
||||||
|
"\n",
|
||||||
|
"### 5. Additional Resources or References\n",
|
||||||
|
"\n",
|
||||||
|
"- [Python for Loop Documentation](https://docs.python.org/3/reference/compound_stmts.html#for)\n",
|
||||||
|
"- [JavaScript for Loop Documentation](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/for)\n",
|
||||||
|
"- [Java for Loop Documentation](https://docs.oracle.com/javase/tutorial/java/nutsandbolts/ch04.html#for)\n",
|
||||||
|
"\n",
|
||||||
|
"These resources provide in-depth explanations and examples for different programming languages, and can be useful for further learning about `for` loops and loops in general.\n"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
"<IPython.core.display.Markdown object>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "display_data"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Goodbye!\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# Set stream to true to allow streaming of the response\n",
|
||||||
|
"# It Mimics REPL\n",
|
||||||
|
"# After running look up to see a terminal where you put in your question\n",
|
||||||
|
"Chat.start(stream=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 12,
|
||||||
|
"id": "7a086b95",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/markdown": [
|
||||||
|
"### What is an `if` Statement?\n",
|
||||||
|
"\n",
|
||||||
|
"An `if` statement is a fundamental control flow statement in programming that allows you to execute a block of code based on a specified condition. If the condition evaluates to `true`, the block of code will execute; otherwise, it will be skipped.\n",
|
||||||
|
"\n",
|
||||||
|
"#### 1. A Clear, Accurate Answer\n",
|
||||||
|
"\n",
|
||||||
|
"In programming, the `if` statement checks a condition. If the condition is `true`, the code inside the `if` block is executed. If the condition is `false`, the block is ignored. \n",
|
||||||
|
"\n",
|
||||||
|
"Here’s the basic syntax in Python and JavaScript as examples:\n",
|
||||||
|
"\n",
|
||||||
|
"**Python Syntax:**\n",
|
||||||
|
"```python\n",
|
||||||
|
"if condition:\n",
|
||||||
|
" # Code to execute if condition is true\n",
|
||||||
|
"```\n",
|
||||||
|
"\n",
|
||||||
|
"**JavaScript Syntax:**\n",
|
||||||
|
"```javascript\n",
|
||||||
|
"if (condition) {\n",
|
||||||
|
" // Code to execute if condition is true\n",
|
||||||
|
"}\n",
|
||||||
|
"```\n",
|
||||||
|
"\n",
|
||||||
|
"#### 2. Code Examples\n",
|
||||||
|
"\n",
|
||||||
|
"**Python Example:**\n",
|
||||||
|
"```python\n",
|
||||||
|
"temperature = 30\n",
|
||||||
|
"\n",
|
||||||
|
"if temperature > 25:\n",
|
||||||
|
" print(\"It's a hot day!\")\n",
|
||||||
|
"```\n",
|
||||||
|
"\n",
|
||||||
|
"**JavaScript Example:**\n",
|
||||||
|
"```javascript\n",
|
||||||
|
"let temperature = 30;\n",
|
||||||
|
"\n",
|
||||||
|
"if (temperature > 25) {\n",
|
||||||
|
" console.log(\"It's a hot day!\");\n",
|
||||||
|
"}\n",
|
||||||
|
"```\n",
|
||||||
|
"\n",
|
||||||
|
"In both examples, if the `temperature` variable is greater than 25, the corresponding message will be printed to the console.\n",
|
||||||
|
"\n",
|
||||||
|
"#### 3. Best Practices and Recommendations\n",
|
||||||
|
"\n",
|
||||||
|
"- **Use Clear Conditions**: Ensure that the condition being evaluated is clear and understandable. \n",
|
||||||
|
"- **Avoid Complex Conditions**: If conditions become too complex, consider breaking them down into multiple `if` statements or using logical operators for clarity.\n",
|
||||||
|
"- **Indentation**: Properly indent your code blocks. This improves readability and maintainability.\n",
|
||||||
|
"- **Use `elif`/`else if` for Multiple Conditions**: When evaluating multiple conditions, use `elif` (Python) or `else if` (JavaScript) to make the logic cleaner.\n",
|
||||||
|
" \n",
|
||||||
|
" **Example with `elif`:**\n",
|
||||||
|
" ```python\n",
|
||||||
|
" score = 85\n",
|
||||||
|
"\n",
|
||||||
|
" if score >= 90:\n",
|
||||||
|
" print(\"Grade: A\")\n",
|
||||||
|
" elif score >= 80:\n",
|
||||||
|
" print(\"Grade: B\")\n",
|
||||||
|
" else:\n",
|
||||||
|
" print(\"Grade: C\")\n",
|
||||||
|
" ```\n",
|
||||||
|
"\n",
|
||||||
|
"#### 4. Potential Pitfalls or Considerations\n",
|
||||||
|
"\n",
|
||||||
|
"- **Boolean Context**: Ensure that the condition evaluates to a boolean (`true` or `false`). Improper conditions could result in unexpected behavior.\n",
|
||||||
|
"- **Missing `else` or `elif`**: If not handled correctly, cases that fall outside the specified conditions may go unnoticed. Consider using an `else` statement to capture any situations not defined in prior conditions.\n",
|
||||||
|
"- **Short-Circuit Evaluation**: In languages like Python and JavaScript, using logical operators (`and`, `or`) as conditions can lead to short-circuit evaluation, which might affect the execution of your code. Be cautious about using these in conditions.\n",
|
||||||
|
"\n",
|
||||||
|
"#### 5. Additional Resources or References\n",
|
||||||
|
"\n",
|
||||||
|
"- **Python Documentation on `if` Statements**: [Python If Statement](https://docs.python.org/3/tutorial/controlflow.html#if-statements)\n",
|
||||||
|
"- **JavaScript Documentation on Conditional Statements**: [MDN Web Docs - Conditionals](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Control_flow_and_error_handling#conditional_statements)\n",
|
||||||
|
"\n",
|
||||||
|
"Understanding how `if` statements work is crucial for implementing decision-making logic in your programs, enabling dynamic behavior based on varying conditions."
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
"<IPython.core.display.Markdown object>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "display_data"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Goodbye!\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# Set stream to false to get a single response\n",
|
||||||
|
"Chat.start(stream=False)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "95daf1f1",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Ignore if you don't want to use ollama\n",
|
||||||
|
"# Here shows the ability to switch from one endpoint to another\n",
|
||||||
|
"Chat.set_endpoint(\"http://localhost:11434/v1\")\n",
|
||||||
|
"Chat.set_model(\"llama3.2\")\n",
|
||||||
|
"\n",
|
||||||
|
"Chat.start(stream=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 16,
|
||||||
|
"id": "c7d66ef7",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"* Running on local URL: http://127.0.0.1:7861\n",
|
||||||
|
"* To create a public link, set `share=True` in `launch()`.\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div><iframe src=\"http://127.0.0.1:7861/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
"<IPython.core.display.HTML object>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "display_data"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": []
|
||||||
|
},
|
||||||
|
"execution_count": 16,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"gr_output = gr.Markdown(label=\"Response\")\n",
|
||||||
|
"stream_input = gr.Checkbox(label='Stream', value=False)\n",
|
||||||
|
"question_input = gr.Textbox(label=\"Question\", info=\"Ask it any technical question\", lines=1)\n",
|
||||||
|
"\n",
|
||||||
|
"interface = gr.Interface(\n",
|
||||||
|
" fn=Chat.start_with_gradio, \n",
|
||||||
|
" title=\"ChatGPT\", \n",
|
||||||
|
" inputs=[question_input, stream_input], \n",
|
||||||
|
" outputs=[gr_output], \n",
|
||||||
|
" flagging_mode=\"never\"\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"interface.launch()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "ed776b93",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": ".venv",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.12.12"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
||||||
716
week1/community-contributions/solisoma/week1_exercises.ipynb
Normal file
716
week1/community-contributions/solisoma/week1_exercises.ipynb
Normal file
@@ -0,0 +1,716 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "7dbe3347",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# 🚀 Advanced Web Scraping & AI Assistant - Week 1 Complete Exercise\n",
|
||||||
|
"\n",
|
||||||
|
"## 📋 **Notebook Overview**\n",
|
||||||
|
"\n",
|
||||||
|
"This notebook demonstrates the **complete evolution** of a web scraping solution through **Week 1** of the LLM Engineering course.\n",
|
||||||
|
"\n",
|
||||||
|
"### **Exercise Progression:**\n",
|
||||||
|
"- **Cells 1-7**: Week 1 Day 1 (basic scraping + AI)\n",
|
||||||
|
"- **Cell 8**: Week 1 Day 2 (Ollama integration) \n",
|
||||||
|
"- **Cells 9-13**: Week 1 Day 5 (advanced features + brochure generation)\n",
|
||||||
|
"\n",
|
||||||
|
"### **Key Learning Progression:**\n",
|
||||||
|
"1. **Day 1**: JavaScript scraping problem → Selenium solution\n",
|
||||||
|
"2. **Day 2**: Remote ↔ Local AI flexibility (OpenAI ↔ Ollama)\n",
|
||||||
|
"3. **Day 5**: Multi-page intelligence + business automation\n",
|
||||||
|
"\n",
|
||||||
|
"### **Technical Skills:**\n",
|
||||||
|
"- Selenium WebDriver, OpenAI API, Ollama, JSON processing, Class inheritance, Streaming responses\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"id": "9addd8d1",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# week1 -> day1\n",
|
||||||
|
"import os\n",
|
||||||
|
"from dotenv import load_dotenv\n",
|
||||||
|
"from selenium.webdriver.common.by import By\n",
|
||||||
|
"from selenium.webdriver.chrome.options import Options\n",
|
||||||
|
"from selenium import webdriver\n",
|
||||||
|
"from IPython.display import Markdown, display, update_display\n",
|
||||||
|
"from openai import OpenAI\n",
|
||||||
|
"\n",
|
||||||
|
"# week1 -> day5\n",
|
||||||
|
"import json\n",
|
||||||
|
"from typing import Dict, List\n",
|
||||||
|
"\n",
|
||||||
|
"#week2 -> day2\n",
|
||||||
|
"import gradio as gr"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "85bf7734",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 📦 **Dependencies**\n",
|
||||||
|
"\n",
|
||||||
|
"**Week 1 Day 1**: Core scraping + AI integration\n",
|
||||||
|
"**Week 1 Day 5**: Added JSON processing + type hints\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "f881e916",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## **Environment Setup**\n",
|
||||||
|
"\n",
|
||||||
|
"This cell loads the OpenAI API key from the `.env` file. The `override=True` parameter ensures that any existing environment variables are replaced with values from the `.env` file.\n",
|
||||||
|
"\n",
|
||||||
|
"**Important**: Make sure you have a `.env` file in your project root with:\n",
|
||||||
|
"```\n",
|
||||||
|
"OPENAI_API_KEY=your-actual-api-key-here\n",
|
||||||
|
"```\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"id": "7123ba55",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"load_dotenv(override=True)\n",
|
||||||
|
"api_key:str = os.getenv('OPENAI_API_KEY')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "ab17f1a7",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 🏗️ **WebpageSummarizer Class**\n",
|
||||||
|
"\n",
|
||||||
|
"**Day 1**: Basic scraping + AI integration\n",
|
||||||
|
"**Day 2**: Remote ↔ Local flexibility (`set_endpoint`, `set_model`)\n",
|
||||||
|
"**Day 5**: Multi-page intelligence + brochure generation\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 11,
|
||||||
|
"id": "5e9cdf71",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"class WebpageSummarizer:\n",
|
||||||
|
" # week1 -> day1\n",
|
||||||
|
" _system_prompt = \"\"\"\n",
|
||||||
|
" You are a snarkyassistant that analyzes the contents of a website, \n",
|
||||||
|
" and provides a short, snarky, humorous summary, ignoring text that might be navigation related.\n",
|
||||||
|
" Respond in markdown. Do not wrap the markdown in a code block - respond just with the markdown.\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" \n",
|
||||||
|
" # week1 -> day1\n",
|
||||||
|
" _MODEL = \"gpt-4o-mini\"\n",
|
||||||
|
"\n",
|
||||||
|
" # week1 -> day1\n",
|
||||||
|
" def __init__(self, model: str = _MODEL) -> None:\n",
|
||||||
|
" self.openai_client = OpenAI()\n",
|
||||||
|
" self.driver = webdriver.Chrome()\n",
|
||||||
|
" self._MODEL = model\n",
|
||||||
|
" \n",
|
||||||
|
" # week1 -> day1\n",
|
||||||
|
" def scrape_website(self, url: str) -> str:\n",
|
||||||
|
" self.driver.get(url)\n",
|
||||||
|
" self.driver.implicitly_wait(10)\n",
|
||||||
|
" title = self.driver.title\n",
|
||||||
|
" text_content = self.driver.find_element(By.TAG_NAME, \"body\").text\n",
|
||||||
|
" return title + \"\\n\\n\" + text_content\n",
|
||||||
|
"\n",
|
||||||
|
" # week1 -> day1\n",
|
||||||
|
" def summarize_text(self, url: str) -> str:\n",
|
||||||
|
" text = self.scrape_website(url)\n",
|
||||||
|
" response = self.openai_client.chat.completions.create(\n",
|
||||||
|
" model=self._MODEL,\n",
|
||||||
|
" messages=[\n",
|
||||||
|
" {\"role\": \"system\", \"content\": self._system_prompt},\n",
|
||||||
|
" {\"role\": \"user\", \"content\": text}\n",
|
||||||
|
" ]\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
" return response.choices[0].message.content\n",
|
||||||
|
"\n",
|
||||||
|
" # week1 -> day1\n",
|
||||||
|
" def display_summary(self, url: str)-> None:\n",
|
||||||
|
" summary:str = self.summarize_text(url)\n",
|
||||||
|
" display(Markdown(summary))\n",
|
||||||
|
"\n",
|
||||||
|
" # week1 -> day2\n",
|
||||||
|
" def set_endpoint(self, endpoint: str, api_key: str = \"ollama\") -> None:\n",
|
||||||
|
" self.openai_client = OpenAI(base_url=endpoint, api_key=api_key)\n",
|
||||||
|
"\n",
|
||||||
|
" # week1 -> day2\n",
|
||||||
|
" def set_model(self, model: str) -> None:\n",
|
||||||
|
" self._MODEL = model\n",
|
||||||
|
"\n",
|
||||||
|
" # week1 -> day5\n",
|
||||||
|
" def set_system_prompt(self, system_prompt: str) -> None:\n",
|
||||||
|
" self._system_prompt = system_prompt\n",
|
||||||
|
"\n",
|
||||||
|
" # week1 -> day5\n",
|
||||||
|
" def scrape_website_links(self, url: str) -> list[str]:\n",
|
||||||
|
" self.driver.get(url)\n",
|
||||||
|
" self.driver.implicitly_wait(10)\n",
|
||||||
|
" \n",
|
||||||
|
" links = self.driver.find_elements(By.TAG_NAME, \"a\")\n",
|
||||||
|
" return [link.get_attribute(\"href\") for link in links \n",
|
||||||
|
" if link.get_attribute(\"href\") and link.get_attribute(\"href\").strip()]\n",
|
||||||
|
"\n",
|
||||||
|
" # week1 -> day5\n",
|
||||||
|
" def generate_user_prompt_to_select_relevant_links(self, url: str) -> str:\n",
|
||||||
|
" user_prompt = f\"\"\"\n",
|
||||||
|
" Here is the list of links on the website {url} -\n",
|
||||||
|
" Please decide which of these are relevant web links for a brochure about the company, \n",
|
||||||
|
" respond with the full https URL in JSON format.\n",
|
||||||
|
" Do not include Terms of Service, Privacy, email links.\n",
|
||||||
|
"\n",
|
||||||
|
" Links (some might be relative links):\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" links = self.scrape_website_links(url)\n",
|
||||||
|
" user_prompt += \"\\n\".join(links)\n",
|
||||||
|
" return user_prompt\n",
|
||||||
|
"\n",
|
||||||
|
" # week1 -> day5\n",
|
||||||
|
" def select_relevant_links(self, url:str) -> Dict[str, List[Dict[str, str]]]:\n",
|
||||||
|
" message = self.generate_user_prompt_to_select_relevant_links(url)\n",
|
||||||
|
" response = self.openai_client.chat.completions.create(\n",
|
||||||
|
" model=self._MODEL,\n",
|
||||||
|
" messages=[\n",
|
||||||
|
" {\"role\": \"system\", \"content\": self._system_prompt},\n",
|
||||||
|
" {\"role\": \"user\", \"content\": message}\n",
|
||||||
|
" ],\n",
|
||||||
|
" response_format={\"type\": \"json_object\"}\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
" json_response = json.loads(response.choices[0].message.content)\n",
|
||||||
|
"\n",
|
||||||
|
" return json_response\n",
|
||||||
|
"\n",
|
||||||
|
" # week1 -> day5\n",
|
||||||
|
" def fetch_page_and_all_relevant_links(self, url):\n",
|
||||||
|
" contents = self.scrape_website(url)\n",
|
||||||
|
" relevant_links = self.select_relevant_links(url)\n",
|
||||||
|
" result = f\"## Landing Page:\\n\\n{contents}\\n## Relevant Links:\\n\"\n",
|
||||||
|
" for link in relevant_links[\"links\"]:\n",
|
||||||
|
" result += f\"\\n\\n### Link: {link[\"type\"]}\\n\"\n",
|
||||||
|
" result += self.scrape_website(link[\"url\"])\n",
|
||||||
|
" return result\n",
|
||||||
|
" \n",
|
||||||
|
" def get_user_prompt_for_brochure(self, company_name:str, url:str) -> str:\n",
|
||||||
|
" user_prompt = f\"\"\"\n",
|
||||||
|
" You are looking at a company called: {company_name}\n",
|
||||||
|
" Here are the contents of its landing page and other relevant pages;\n",
|
||||||
|
" use this information to build a short brochure of the company in markdown without code blocks.\\n\\n\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" user_prompt += self.fetch_page_and_all_relevant_links(url)\n",
|
||||||
|
" user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters\n",
|
||||||
|
" return user_prompt\n",
|
||||||
|
"\n",
|
||||||
|
" # week1 -> day5\n",
|
||||||
|
" def generate_brochure(self, company_name:str, url:str, link_prompt: str, brochure_prompt: str, stream: bool = False) -> None:\n",
|
||||||
|
" self.set_system_prompt(link_prompt)\n",
|
||||||
|
" contents = self.get_user_prompt_for_brochure(company_name,url)\n",
|
||||||
|
" self.set_system_prompt(brochure_prompt)\n",
|
||||||
|
" response = self.openai_client.chat.completions.create(\n",
|
||||||
|
" model=self._MODEL,\n",
|
||||||
|
" messages=[{\"role\": \"system\", \"content\": self._system_prompt}, {\"role\": \"user\", \"content\": contents}],\n",
|
||||||
|
" stream=stream # for streaming response\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
" if stream:\n",
|
||||||
|
" full_response = \"\"\n",
|
||||||
|
" display_handle = display(Markdown(full_response), display_id=True)\n",
|
||||||
|
" for chunk in response:\n",
|
||||||
|
" full_response += chunk.choices[0].delta.content or \"\"\n",
|
||||||
|
" update_display(Markdown(full_response), display_id=display_handle.display_id)\n",
|
||||||
|
" else:\n",
|
||||||
|
" result = response.choices[0].message.content\n",
|
||||||
|
" display(Markdown(result))\n",
|
||||||
|
"\n",
|
||||||
|
" # week2 -> day2\n",
|
||||||
|
" def generate_brochure_with_gradio(self, company_name:str, url:str, link_prompt: str, brochure_prompt: str, stream: bool = False):\n",
|
||||||
|
" self.set_system_prompt(link_prompt)\n",
|
||||||
|
" contents = self.get_user_prompt_for_brochure(company_name,url)\n",
|
||||||
|
" self.set_system_prompt(brochure_prompt)\n",
|
||||||
|
" response = self.openai_client.chat.completions.create(\n",
|
||||||
|
" model=self._MODEL,\n",
|
||||||
|
" messages=[{\"role\": \"system\", \"content\": self._system_prompt}, {\"role\": \"user\", \"content\": contents}],\n",
|
||||||
|
" stream=stream # for streaming response\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
" if stream:\n",
|
||||||
|
" full_response = \"\"\n",
|
||||||
|
" for chunk in response:\n",
|
||||||
|
" full_response += chunk.choices[0].delta.content or \"\"\n",
|
||||||
|
" yield full_response\n",
|
||||||
|
" else:\n",
|
||||||
|
" result = response.choices[0].message.content\n",
|
||||||
|
" yield result\n",
|
||||||
|
" "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "cc085a2b",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Demo: LinkedIn Summary\n",
|
||||||
|
"\n",
|
||||||
|
"This cell demonstrates the WebpageSummarizer in action by:\n",
|
||||||
|
"\n",
|
||||||
|
"1. **Creating an instance** with the GPT-5-nano model\n",
|
||||||
|
"2. **Scraping LinkedIn's homepage** - a JavaScript-heavy site that traditional scraping can't handle\n",
|
||||||
|
"3. **Generating a snarky summary** that captures the essence of LinkedIn's professional networking platform\n",
|
||||||
|
"\n",
|
||||||
|
"### What Happens:\n",
|
||||||
|
"- Selenium opens Chrome browser (visible window)\n",
|
||||||
|
"- Navigates to LinkedIn.com\n",
|
||||||
|
"- Waits for JavaScript to render all content\n",
|
||||||
|
"- Extracts all visible text from the page\n",
|
||||||
|
"- Sends content to OpenAI for summarization\n",
|
||||||
|
"- Displays the humorous, sarcastic summary in markdown format\n",
|
||||||
|
"\n",
|
||||||
|
"### Expected Output:\n",
|
||||||
|
"A witty, entertaining summary that captures LinkedIn's key features and business model with a humorous tone.\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"id": "cfe93bea",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Error sending stats to Plausible: error sending request for url (https://plausible.io/api/event)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/markdown": [
|
||||||
|
"LinkedIn’s homepage in a nutshell: a corporate buffet of jobs, courses, tools, and guilt-inducing “Open to Work” vibes, wrapped in a lot of navigation clutter.\n",
|
||||||
|
"\n",
|
||||||
|
"- Top Content: Curated posts and expert insights by topic (Career, Productivity, Finance, Soft Skills, Project Management, etc.). Yes, because your feed needed more buzzwords.\n",
|
||||||
|
"- Jobs: Find the right job or internship across a big menu of roles (Engineering, Marketing, IT, HR, Admin, Retail, etc.). Tempting you with endless openings.\n",
|
||||||
|
"- Post your job: Post a job for millions to see. Because nothing says “we’re hiring” like a public billboard.\n",
|
||||||
|
"- Software tools: Discover the best software—CRM, HRMS, Project Management, Help Desk, etc.—as if you were deciding which inbox to dread today.\n",
|
||||||
|
"- Games: Keep your mind sharp with daily games (Pinpoint, Queens, Crossclimb, Tango, Zip, Mini Sudoku). Productivity through micro-snacks!\n",
|
||||||
|
"- Open To Work: Privately tell recruiters or publicly broadcast you’re looking for opportunities. Subtle as a neon sign.\n",
|
||||||
|
"- Connect and Learn: Find people you know, learn new skills, and choose topics to study. Professional life, now with more onboarding prompts.\n",
|
||||||
|
"- Who is LinkedIn for?: Anyone navigating professional life—because apparently that’s everyone.\n",
|
||||||
|
"- Bottom line: It’s a hub of professional action—job hunting, learning, toolshopping, and the occasional brain teaser to distract you from the grim reality of deadlines."
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
"<IPython.core.display.Markdown object>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "display_data"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# week1 -> day1\n",
|
||||||
|
"Summarizer = WebpageSummarizer(\"gpt-5-nano\")\n",
|
||||||
|
"\n",
|
||||||
|
"Summarizer.display_summary(\"https://www.linkedin.com\")\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "4816a966",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 🔄 **Day 2 - Remote ↔ Local AI**\n",
|
||||||
|
"\n",
|
||||||
|
"Seamless switching between OpenAI (cloud) and Ollama (local) using `set_endpoint()`\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "7b650e50",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 🚀 **Day 5 - Multi-Page Intelligence**\n",
|
||||||
|
"\n",
|
||||||
|
"AI-powered link analysis + automated company brochure generation\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 6,
|
||||||
|
"id": "d586747e",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# week1 -> day2\n",
|
||||||
|
"Summarizer.set_endpoint(\"http://localhost:11434/v1\")\n",
|
||||||
|
"Summarizer.set_model(\"llama3.2\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "43331574",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"Summarizer.display_summary(\"https://www.linkedin.com\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 12,
|
||||||
|
"id": "e4e90b4a",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"Summarizer = WebpageSummarizer(\"gpt-5-nano\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 13,
|
||||||
|
"id": "fb6b2d25",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"LINK_SYSTEM_PROMPT = \"\"\"\n",
|
||||||
|
" You are provided with a list of links found on a webpage.\n",
|
||||||
|
" You are able to decide which of the links would be most relevant to include in a brochure about the company,\n",
|
||||||
|
" such as links to an About page, or a Company page, or Careers/Jobs pages.\n",
|
||||||
|
" You should respond in JSON as in this example:\n",
|
||||||
|
"\n",
|
||||||
|
" {\n",
|
||||||
|
" \"links\": [\n",
|
||||||
|
" {\"type\": \"about page\", \"url\": \"https://full.url/goes/here/about\"},\n",
|
||||||
|
" {\"type\": \"careers page\", \"url\": \"https://another.full.url/careers\"}\n",
|
||||||
|
" ]\n",
|
||||||
|
" }\n",
|
||||||
|
" \"\"\""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 14,
|
||||||
|
"id": "da54f8ca",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"BRAND_SYSTEM_PROMPT = \"\"\" \n",
|
||||||
|
"You are an assistant that analyzes the contents of several relevant pages from a company website\n",
|
||||||
|
"and creates a short brochure about the company for prospective customers, investors and recruits.\n",
|
||||||
|
"Respond in markdown without code blocks.\n",
|
||||||
|
"Include details of company culture, customers and careers/jobs if you have the information. \n",
|
||||||
|
"\"\"\"\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 16,
|
||||||
|
"id": "b6055ce5",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/markdown": [
|
||||||
|
"# Hugging Face — The AI community building the future\n",
|
||||||
|
"\n",
|
||||||
|
"Hugging Face is the collaboration platform at the heart of the machine learning community. We empower researchers, engineers, and end users to learn, share, and build open, ethical AI together.\n",
|
||||||
|
"\n",
|
||||||
|
"---\n",
|
||||||
|
"\n",
|
||||||
|
"## What we do\n",
|
||||||
|
"\n",
|
||||||
|
"- A vibrant platform where the ML community collaborates on models, datasets, and applications\n",
|
||||||
|
"- Browse 1M+ models, discover 400k+ apps, and explore 250k+ datasets\n",
|
||||||
|
"- Multi-modality support: text, image, video, audio, and even 3D\n",
|
||||||
|
"- Build and showcase your ML portfolio by sharing your work with the world\n",
|
||||||
|
"- Sign up to join a thriving ecosystem and accelerate your ML journey\n",
|
||||||
|
"\n",
|
||||||
|
"---\n",
|
||||||
|
"\n",
|
||||||
|
"## The platform (products and capabilities)\n",
|
||||||
|
"\n",
|
||||||
|
"- Hub for Models, Datasets, and Spaces\n",
|
||||||
|
" - Host and collaborate on unlimited public models, datasets, and applications\n",
|
||||||
|
"- HF Open Source Stack\n",
|
||||||
|
" - Move faster with a comprehensive open source foundation\n",
|
||||||
|
"- Inference & Deployment\n",
|
||||||
|
" - Inference Endpoints to deploy at scale; GPU-enabled Spaces in a few clicks\n",
|
||||||
|
" - Inference Providers give access to 45,000+ models via a single unified API (no service fees)\n",
|
||||||
|
"- HuggingChat Omni\n",
|
||||||
|
" - Chat with AI across the ecosystem\n",
|
||||||
|
"- Services for teams\n",
|
||||||
|
" - Enterprise-grade security, access controls, and dedicated support\n",
|
||||||
|
" - Starting at $20 per user per month\n",
|
||||||
|
"- Compute options\n",
|
||||||
|
" - Starting at $0.60/hour for GPU\n",
|
||||||
|
"- Open ecosystem\n",
|
||||||
|
" - Our open source projects power the ML toolchain and community\n",
|
||||||
|
" - Key projects include Transformers, Diffusers, Safetensors, Tokenizers, TRL, Transformers.js, smolagents, and more\n",
|
||||||
|
"\n",
|
||||||
|
"---\n",
|
||||||
|
"\n",
|
||||||
|
"## Our open source core\n",
|
||||||
|
"\n",
|
||||||
|
"We’re building the foundation of ML tooling with the community. Our flagship projects include:\n",
|
||||||
|
"- Transformers (state-of-the-art models for PyTorch)\n",
|
||||||
|
"- Diffusers (diffusion models)\n",
|
||||||
|
"- Safetensors (safe storage/distribution of weights)\n",
|
||||||
|
"- Hub Python Library (Python client for the Hugging Face Hub)\n",
|
||||||
|
"- Tokenizers, TRL, Transformers.js, smolagents\n",
|
||||||
|
"- These projects power the vast Hugging Face ecosystem and enable researchers and developers to innovate openly\n",
|
||||||
|
"\n",
|
||||||
|
"---\n",
|
||||||
|
"\n",
|
||||||
|
"## Customers, partners, and impact\n",
|
||||||
|
"\n",
|
||||||
|
"- More than 50,000 organizations use Hugging Face\n",
|
||||||
|
"- Notable teams and enterprises rely on our platform, including leaders such as Meta AI, Amazon, Google, Microsoft, Intel, Grammarly, Writer, and more\n",
|
||||||
|
"- We support both individual researchers and large teams with scalable, secure solutions\n",
|
||||||
|
"\n",
|
||||||
|
"---\n",
|
||||||
|
"\n",
|
||||||
|
"## Culture, community, and values\n",
|
||||||
|
"\n",
|
||||||
|
"- Open and ethical AI future, built together with the community\n",
|
||||||
|
"- A learning-first, collaborative environment that values openness and sharing\n",
|
||||||
|
"- Strong emphasis on open source tooling and transparent collaboration\n",
|
||||||
|
"- A platform that empowers the next generation of ML engineers, scientists, and end users\n",
|
||||||
|
"\n",
|
||||||
|
"From brand storytelling to product strategy, we emphasize a cooperative, community-driven approach to advancing AI in a responsible way.\n",
|
||||||
|
"\n",
|
||||||
|
"---\n",
|
||||||
|
"\n",
|
||||||
|
"## Careers and how to join\n",
|
||||||
|
"\n",
|
||||||
|
"- We regularly post opportunities on our Careers page. If you’re excited by open science, open source tooling, and building tools that empower thousands of practitioners, Hugging Face could be a great fit.\n",
|
||||||
|
"- Join a growing, mission-driven team that supports developers, researchers, and enterprise customers with cutting-edge AI tooling\n",
|
||||||
|
"\n",
|
||||||
|
"---\n",
|
||||||
|
"\n",
|
||||||
|
"## How to engage\n",
|
||||||
|
"\n",
|
||||||
|
"- Explore Models, Datasets, and Spaces\n",
|
||||||
|
"- Try HuggingChat Omni\n",
|
||||||
|
"- Sign up to build your ML portfolio and collaborate with the community\n",
|
||||||
|
"- For teams, learn about our enterprise options, security, and dedicated support\n",
|
||||||
|
"\n",
|
||||||
|
"---\n",
|
||||||
|
"\n",
|
||||||
|
"## Why invest or partner with Hugging Face\n",
|
||||||
|
"\n",
|
||||||
|
"- A thriving, open-source ecosystem with broad adoption across industry and academia\n",
|
||||||
|
"- A scalable platform that combines models, datasets, spaces, and applications under one roof\n",
|
||||||
|
"- A proven track record of enabling organizations to accelerate AI development while offering enterprise-grade security and support\n",
|
||||||
|
"- A growing customer base and a clear pathway from community tools to enterprise deployment\n",
|
||||||
|
"\n",
|
||||||
|
"---\n",
|
||||||
|
"\n",
|
||||||
|
"If you’d like more detail on specific products, a few success stories, or to see current open roles, I can pull together a concise section tailored to investors, customers, or prospective hires."
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
"<IPython.core.display.Markdown object>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "display_data"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# Generate brochure without streaming the response\n",
|
||||||
|
"Summarizer.generate_brochure(\"Hugging Face\", \"https://huggingface.co\", LINK_SYSTEM_PROMPT, BRAND_SYSTEM_PROMPT)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 17,
|
||||||
|
"id": "ff5a5341",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/markdown": [
|
||||||
|
"# Edward (Ed) Donner — Co-founder & CTO, Nebula.io\n",
|
||||||
|
"\n",
|
||||||
|
"A glimpse into the mission, technology, and culture behind Nebula.io, led by Ed Donner, with a focus on transforming recruitment through AI.\n",
|
||||||
|
"\n",
|
||||||
|
"## Who we are\n",
|
||||||
|
"- Edward (Ed) Donner is the co-founder and CTO of Nebula.io.\n",
|
||||||
|
"- Nebula.io applies Generative AI and other machine learning to help recruiters source, understand, engage, and manage talent.\n",
|
||||||
|
"- The platform uses a patented matching model that connects people with roles more accurately and quickly—without relying on keywords.\n",
|
||||||
|
"\n",
|
||||||
|
"## What we do\n",
|
||||||
|
"- Enable recruiters to source, understand, engage, and manage talent at scale.\n",
|
||||||
|
"- Use proprietary, verticalized LLMs tailored for talent and hiring workflows.\n",
|
||||||
|
"- Offer a patented matching model that improves accuracy and speed, with no keyword tyranny.\n",
|
||||||
|
"- Provide a platform that is award-winning and backed by press coverage, designed to help people discover roles where they will thrive.\n",
|
||||||
|
"- The product is described as free to try, offering a no-barrier way to explore its capabilities.\n",
|
||||||
|
"\n",
|
||||||
|
"## Our technology and approach\n",
|
||||||
|
"- Proprietary LLMs specialized for talent recruitment.\n",
|
||||||
|
"- A patented matching engine that aligns people with roles more effectively than traditional keyword-based methods.\n",
|
||||||
|
"- Emphasis on real-world impact: applying AI to help people discover their potential and pursue their Ikigai—finding roles where they can be fulfilled and successful.\n",
|
||||||
|
"- The platform supports Gen AI and Agentic AI use cases, including practical deployments at scale (evidenced by references to AWS-scale implementations).\n",
|
||||||
|
"\n",
|
||||||
|
"## Why Nebula.io matters\n",
|
||||||
|
"- Addressing a broad human capital challenge: many people feel uninspired or disengaged at work, and Nebula.io aims to change that by better matching individuals to meaningful roles.\n",
|
||||||
|
"- The long-term vision centers on raising human prosperity by helping people pursue fulfilling career paths.\n",
|
||||||
|
"\n",
|
||||||
|
"## History, credibility, and impact\n",
|
||||||
|
"- Origin: Nebula.io traces back to Ed’s prior venture, untapt (founded in 2013), which built talent marketplaces and data science tools for recruitment.\n",
|
||||||
|
"- Early recognition: selected for the Accenture FinTech Innovation Lab; named an American Banker Top 20 Company To Watch.\n",
|
||||||
|
"- Media coverage: features in Fast Company, Forbes, and American Banker; Ed has spoken publicly about AI and recruitment, including high-profile interviews.\n",
|
||||||
|
"- Legacy of real-world impact: Nebula.io builds on a track record of applying AI to recruitment challenges and delivering value to customers.\n",
|
||||||
|
"\n",
|
||||||
|
"## Culture and values\n",
|
||||||
|
"- Ikigai-driven philosophy: helping people discover their potential and pursue meaningful work.\n",
|
||||||
|
"- A hands-on, creative founder who blends technical rigor with curiosity (Ed’s interests include coding, experimenting with LLMs, DJing, and exploring tech culture).\n",
|
||||||
|
"- A pragmatic, impact-focused approach to AI—prioritizing real-world problems and measurable outcomes for customers and candidates alike.\n",
|
||||||
|
"\n",
|
||||||
|
"## Customers and impact\n",
|
||||||
|
"- The platform is used by recruiters today to source, understand, engage, and manage talent.\n",
|
||||||
|
"- The emphasis is on delivering a better, faster, more accurate matching experience—reducing reliance on keyword matching and accelerating hiring outcomes.\n",
|
||||||
|
"- While specific customer names aren’t listed on the public pages, the platform is described as having happy customers and broad press coverage, underscoring credibility and market reception.\n",
|
||||||
|
"\n",
|
||||||
|
"## Careers and opportunities\n",
|
||||||
|
"- The site highlights a culture of innovation and hands-on AI work, but does not list open job postings.\n",
|
||||||
|
"- For those inspired to work at the intersection of AI and talent, Nebula.io invites connections and conversations about opportunities to contribute to real-world hiring problems.\n",
|
||||||
|
"- If you’re interested in joining or collaborating, consider reaching out to Ed Donner and exploring how your skills could fit the mission.\n",
|
||||||
|
"\n",
|
||||||
|
"## How to connect\n",
|
||||||
|
"- Email: ed [at] edwarddonner [dot] com\n",
|
||||||
|
"- Website: www.edwarddonner.com\n",
|
||||||
|
"- Follow Ed on social: LinkedIn, Twitter, Facebook\n",
|
||||||
|
"- Newsletter: Subscribe to updates and course offerings related to AI, LLMs, and talent acquisition\n",
|
||||||
|
"\n",
|
||||||
|
"## Why invest or partner with Nebula.io\n",
|
||||||
|
"- Strong founder-led vision focused on meaningful, measurable outcomes in hiring.\n",
|
||||||
|
"- Proven track record through prior ventures and credible industry recognition.\n",
|
||||||
|
"- Patent-backed technology offering a differentiated approach to talent matching.\n",
|
||||||
|
"- Clear social impact goal: helping people find roles where they will be fulfilled and productive, contributing to broader prosperity.\n",
|
||||||
|
"\n",
|
||||||
|
"If you’d like a tailored brochure version for investors, customers, or potential recruits, I can adjust the emphasis and add any additional details you’d like highlighted."
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
"<IPython.core.display.Markdown object>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "display_data"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# Generate brochure while streaming the response\n",
|
||||||
|
"Summarizer.generate_brochure(\"Ed Donner\", \"https://edwarddonner.com\", LINK_SYSTEM_PROMPT, BRAND_SYSTEM_PROMPT, stream=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 18,
|
||||||
|
"id": "3f84d4c3",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"* Running on local URL: http://127.0.0.1:7862\n",
|
||||||
|
"* To create a public link, set `share=True` in `launch()`.\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div><iframe src=\"http://127.0.0.1:7862/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
"<IPython.core.display.HTML object>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "display_data"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": []
|
||||||
|
},
|
||||||
|
"execution_count": 18,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# Generate brochure using the Gradio interface\n",
|
||||||
|
"company_name = gr.Textbox(label=\"Company Name\", info=\"Write the name of the company\")\n",
|
||||||
|
"company_url = gr.Textbox(label=\"Company URL\", info=\"Write the URL of the company\")\n",
|
||||||
|
"link_system_prompt = gr.Textbox(\n",
|
||||||
|
" label=\"Link System Prompt\", \n",
|
||||||
|
" info=\"This is a system prompt to decide which of the links would be most relevant to include in a brochure about the company\", \n",
|
||||||
|
" value=LINK_SYSTEM_PROMPT\n",
|
||||||
|
")\n",
|
||||||
|
"brand_system_prompt = gr.Textbox(\n",
|
||||||
|
" label=\"Brand System Prompt\", \n",
|
||||||
|
" info=\"This is a system prompt that analyzes the contents of several relevant pages from a company website and creates a short brochure about the company for prospective customers, investors and recruits.\", \n",
|
||||||
|
" value=BRAND_SYSTEM_PROMPT\n",
|
||||||
|
")\n",
|
||||||
|
"stream_value = gr.Checkbox(label=\"Stream\", value=False)\n",
|
||||||
|
"gr_output = gr.Markdown(label=\"Response\")\n",
|
||||||
|
"\n",
|
||||||
|
"interface = gr.Interface(\n",
|
||||||
|
" fn=Summarizer.generate_brochure_with_gradio, \n",
|
||||||
|
" title=\"Brochure Generator\", \n",
|
||||||
|
" inputs=[company_name, company_url, link_system_prompt, brand_system_prompt, stream_value], \n",
|
||||||
|
" outputs=[gr_output], \n",
|
||||||
|
" flagging_mode=\"never\"\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"interface.launch(inbrowser=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "7114df30",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": ".venv",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.12.12"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user