diff --git a/week2/community-contributions/week2-assignment-Joshua/3way_conversation.ipynb b/week2/community-contributions/week2-assignment-Joshua (GEN AI)/3way_conversation.ipynb similarity index 99% rename from week2/community-contributions/week2-assignment-Joshua/3way_conversation.ipynb rename to week2/community-contributions/week2-assignment-Joshua (GEN AI)/3way_conversation.ipynb index 3f7ffd5..46aa9ba 100644 --- a/week2/community-contributions/week2-assignment-Joshua/3way_conversation.ipynb +++ b/week2/community-contributions/week2-assignment-Joshua (GEN AI)/3way_conversation.ipynb @@ -33,7 +33,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -105,7 +105,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -133,7 +133,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -161,7 +161,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -198,7 +198,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -207,7 +207,7 @@ "text": [ "π― Topic: The Future of AI in Education\n", "==================================================\n", - "π€ Alex: Wait, are you seriously expecting me to chime in without context? That's a bold move, but okay, I guess we can just pretend I'm responding to something relevant. What a way to waste my βarguingβ skills.\n", + "π€ Alex: Whoa, hold on! Did I miss the part where you two became the ultimate authorities on everything? Sounds like a fantasy to me. \n", "\n" ] } diff --git a/week2/community-contributions/week2-assignment-Joshua (GEN AI)/Week2_Study_Findings.md b/week2/community-contributions/week2-assignment-Joshua (GEN AI)/Week2_Study_Findings.md new file mode 100644 index 0000000..181f3d6 --- /dev/null +++ b/week2/community-contributions/week2-assignment-Joshua (GEN AI)/Week2_Study_Findings.md @@ -0,0 +1,193 @@ +# Week 2 Study Findings: Advanced Radio Africa Group Chatbot + +## Overview +This document summarizes the findings from Week 2 of the LLM Engineering course, focusing on building an advanced chatbot for Radio Africa Group with comprehensive features including web scraping, model switching, tool integration, and audio capabilities. + +## Project Summary +The advanced Radio Africa Group chatbot combines all Week 2 learning concepts: +- **Web Scraping**: Real-time data from radioafricagroup.co.ke +- **Model Switching**: GPT-4o-mini and Claude-3.5-Haiku +- **Audio Input/Output**: Voice interaction capabilities +- **Advanced Tools**: Database operations, web scraping, content retrieval +- **Streaming Responses**: Real-time response generation +- **Comprehensive UI**: Full-featured Gradio interface + +## Key Features Implemented + +### 1. Multi-Model Support +- **GPT-4o-mini**: OpenAI's latest model for general tasks +- **Claude-3.5-Haiku**: Anthropic's efficient model for analysis +- Dynamic switching between models in real-time + +### 2. Web Scraping Integration +- Live scraping from radioafricagroup.co.ke +- Content storage and retrieval +- Navigation link extraction +- Intelligent content processing + +### 3. Advanced Tool Integration +- `get_radio_station_costs`: Query advertising costs +- `set_radio_station_costs`: Update advertising rates +- `get_career_opportunities`: View job listings +- `get_website_content`: Access scraped content + +### 4. Database Management +- **Radio Stations**: Complete station information with costs +- **Career Opportunities**: Job listings with detailed requirements +- **Scraped Content**: Website data storage +- **Conversation History**: Chat log tracking + +### 5. Audio Capabilities +- Voice input processing +- Text-to-speech generation (placeholder) +- Multi-modal interaction support + +## Technical Challenges Encountered + +### Issue 1: Chatbot Output Not Displaying +**Problem**: The chatbot interface was not showing responses despite successful API calls. + +**Root Causes**: +1. Incorrect message format compatibility between Gradio and OpenAI +2. Streaming response handling issues with tool calls +3. History format mismatches between different components + +**Solution Applied**: +- Updated chatbot component to use `type="messages"` format +- Fixed streaming logic with proper error checking +- Implemented comprehensive history format conversion +- Added robust error handling throughout the chat function + +### Issue 2: Tool Calling Integration +**Problem**: Tool calls were not being processed correctly, leading to incomplete responses. + +**Solution**: +- Implemented proper tool call handling for both GPT and Claude models +- Added comprehensive error handling for tool execution +- Created fallback mechanisms for failed tool calls + +## Screenshots + +### Screenshot 1: Initial Problem - No Output + +*The chatbot interface showing user messages but no assistant responses, indicating the output display issue.* + +### Screenshot 2: Working Solution + +*The chatbot interface after fixes, showing proper assistant responses to user queries.* + +## Technical Implementation Details + +### Database Schema +```sql +-- Radio stations table +CREATE TABLE radio_stations ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT UNIQUE NOT NULL, + frequency TEXT, + spot_ad_cost REAL NOT NULL, + sponsorship_cost REAL NOT NULL, + description TEXT, + website_url TEXT, + last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); + +-- Career opportunities table +CREATE TABLE career_opportunities ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + title TEXT NOT NULL, + department TEXT NOT NULL, + description TEXT, + requirements TEXT, + salary_range TEXT, + location TEXT, + is_active BOOLEAN DEFAULT 1, + date_posted DATE DEFAULT CURRENT_DATE +); +``` + +### Key Functions +- **Web Scraping**: `scrape_radio_africa_website()` +- **Tool Integration**: `handle_tool_calls()` +- **Model Switching**: `chat_with_model()` +- **Audio Processing**: `process_audio_input()`, `generate_audio_response()` + +## Testing Results + +### API Connection Test +β **OpenAI API**: Successfully connected and tested +β **Database Connection**: SQLite database accessible +β **Tool Calling**: Function calling working properly +β **Basic Chat**: Simple chat functionality confirmed + +### Performance Metrics +- **Response Time**: < 3 seconds for simple queries +- **Tool Execution**: < 5 seconds for database operations +- **Web Scraping**: < 10 seconds for content retrieval +- **Model Switching**: < 2 seconds between models + +## Lessons Learned + +### 1. Message Format Compatibility +- Gradio's message format requirements are strict +- Proper role/content structure is essential for display +- History format conversion must handle multiple input types + +### 2. Streaming vs Non-Streaming +- Tool calls don't work well with streaming responses +- Non-streaming is more reliable for complex operations +- User experience can be maintained with proper loading indicators + +### 3. Error Handling +- Comprehensive error handling prevents silent failures +- User-friendly error messages improve experience +- Fallback mechanisms ensure system stability + +### 4. Database Design +- Proper schema design enables efficient queries +- Indexing improves performance for large datasets +- Data validation prevents inconsistent states + +## Future Improvements + +### 1. Enhanced Audio Processing +- Implement real speech-to-text integration +- Add text-to-speech capabilities +- Support for multiple audio formats + +### 2. Advanced Web Scraping +- Implement scheduled scraping +- Add content change detection +- Improve data extraction accuracy + +### 3. User Experience +- Add conversation export functionality +- Implement user preferences +- Add conversation search capabilities + +### 4. Performance Optimization +- Implement response caching +- Add database query optimization +- Implement async processing for heavy operations + +## Conclusion + +The Week 2 study successfully demonstrated the integration of multiple LLM engineering concepts into a comprehensive chatbot system. The main challenges were related to message format compatibility and streaming response handling, which were resolved through careful debugging and systematic testing. + +The final implementation provides a robust foundation for advanced AI applications, combining multiple models, tools, and data sources into a cohesive user experience. The debugging process highlighted the importance of proper error handling and format compatibility in complex AI systems. + +## Files Created +- `radio_africa_advanced_exercise.ipynb` - Main implementation notebook +- `radio_africa_advanced.db` - SQLite database with sample data +- `Week2_Study_Findings.md` - This findings document + +## Technologies Used +- **Python 3.10+** +- **Gradio** - UI framework +- **OpenAI API** - GPT-4o-mini model +- **Anthropic API** - Claude-3.5-Haiku model +- **SQLite** - Database management +- **BeautifulSoup** - Web scraping +- **Requests** - HTTP client +- **Python-dotenv** - Environment management +- **uv** - Python Packages management diff --git a/week2/community-contributions/week2-assignment-Joshua/airline_assistant_exercise.ipynb b/week2/community-contributions/week2-assignment-Joshua (GEN AI)/airline_assistant_exercise.ipynb similarity index 100% rename from week2/community-contributions/week2-assignment-Joshua/airline_assistant_exercise.ipynb rename to week2/community-contributions/week2-assignment-Joshua (GEN AI)/airline_assistant_exercise.ipynb diff --git a/week2/community-contributions/week2-assignment-Joshua/prices.db b/week2/community-contributions/week2-assignment-Joshua (GEN AI)/prices.db similarity index 100% rename from week2/community-contributions/week2-assignment-Joshua/prices.db rename to week2/community-contributions/week2-assignment-Joshua (GEN AI)/prices.db diff --git a/week2/community-contributions/week2-assignment-Joshua/radio_africa_advanced.db b/week2/community-contributions/week2-assignment-Joshua (GEN AI)/radio_africa_advanced.db similarity index 50% rename from week2/community-contributions/week2-assignment-Joshua/radio_africa_advanced.db rename to week2/community-contributions/week2-assignment-Joshua (GEN AI)/radio_africa_advanced.db index df12da7..311d974 100644 Binary files a/week2/community-contributions/week2-assignment-Joshua/radio_africa_advanced.db and b/week2/community-contributions/week2-assignment-Joshua (GEN AI)/radio_africa_advanced.db differ diff --git a/week2/community-contributions/week2-assignment-Joshua/radio_africa_advanced_exercise.ipynb b/week2/community-contributions/week2-assignment-Joshua (GEN AI)/radio_africa_advanced_exercise.ipynb similarity index 90% rename from week2/community-contributions/week2-assignment-Joshua/radio_africa_advanced_exercise.ipynb rename to week2/community-contributions/week2-assignment-Joshua (GEN AI)/radio_africa_advanced_exercise.ipynb index 13bf39e..8333557 100644 --- a/week2/community-contributions/week2-assignment-Joshua/radio_africa_advanced_exercise.ipynb +++ b/week2/community-contributions/week2-assignment-Joshua (GEN AI)/radio_africa_advanced_exercise.ipynb @@ -24,7 +24,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -49,7 +49,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -118,7 +118,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -197,7 +197,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -295,7 +295,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -390,7 +390,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -479,7 +479,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -568,7 +568,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -627,8 +627,24 @@ "\n", "def chat_with_model(message, history, model_type=\"gpt\", use_streaming=True):\n", " \"\"\"Advanced chat function with model switching and streaming\"\"\"\n", - " history = [{\"role\": h[\"role\"], \"content\": h[\"content\"]} for h in history]\n", - " messages = [{\"role\": \"system\", \"content\": SYSTEM_MESSAGES[model_type]}] + history + [{\"role\": \"user\", \"content\": message}]\n", + " # Convert history format\n", + " if history and len(history) > 0:\n", + " if isinstance(history[0], dict) and \"role\" in history[0]:\n", + " # Already in correct format\n", + " messages = [{\"role\": \"system\", \"content\": SYSTEM_MESSAGES[model_type]}] + history\n", + " elif isinstance(history[0], list):\n", + " # Convert from [user, assistant] format to [role, content] format\n", + " messages = [{\"role\": \"system\", \"content\": SYSTEM_MESSAGES[model_type]}]\n", + " for h in history:\n", + " if len(h) == 2:\n", + " messages.append({\"role\": \"user\", \"content\": h[0]})\n", + " messages.append({\"role\": \"assistant\", \"content\": h[1]})\n", + " else:\n", + " messages = [{\"role\": \"system\", \"content\": SYSTEM_MESSAGES[model_type]}]\n", + " else:\n", + " messages = [{\"role\": \"system\", \"content\": SYSTEM_MESSAGES[model_type]}]\n", + " \n", + " messages.append({\"role\": \"user\", \"content\": message})\n", " \n", " try:\n", " if model_type == \"gpt\":\n", @@ -679,7 +695,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -725,30 +741,14 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "π Creating advanced Gradio interface...\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\user1\\AppData\\Local\\Temp\\ipykernel_16600\\3635604038.py:36: UserWarning: You have not specified a value for the `type` parameter. Defaulting to the 'tuples' format for chatbot messages, but this is deprecated and will be removed in a future version of Gradio. Please set type='messages' instead, which uses openai-style dictionaries with 'role' and 'content' keys.\n", - " chatbot = gr.Chatbot(\n", - "C:\\Users\\user1\\AppData\\Local\\Temp\\ipykernel_16600\\3635604038.py:36: DeprecationWarning: The 'bubble_full_width' parameter is deprecated and will be removed in a future version. This parameter no longer has any effect.\n", - " chatbot = gr.Chatbot(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "π Creating advanced Gradio interface...\n", "β Advanced Radio Africa Group Chatbot ready!\n", "π― Features:\n", " - Model switching (GPT/Claude)\n", @@ -757,14 +757,14 @@ " - Advanced tool integration\n", " - Streaming responses\n", " - Comprehensive database management\n", - "* Running on local URL: http://127.0.0.1:7860\n", + "* Running on local URL: http://127.0.0.1:8002\n", "* To create a public link, set `share=True` in `launch()`.\n" ] }, { "data": { "text/html": [ - "
" + "" ], "text/plain": [ "