diff --git a/week2/community-contributions/muawiya/README.md b/week2/community-contributions/muawiya/README.md new file mode 100644 index 0000000..1f0dcbf --- /dev/null +++ b/week2/community-contributions/muawiya/README.md @@ -0,0 +1,162 @@ +# Airline AI Assistant + +A sophisticated AI-powered airline assistant that leverages agent-based architecture and multi-modal capabilities to provide comprehensive customer support. This project combines multiple AI technologies, including language models, vision models, and audio processing, to create an intelligent assistant capable of handling complex customer queries through various interaction modes. + +## Features + +### Agent-Based Architecture +- **Multi-Agent System**: Utilizes specialized agents for different tasks: + - Chat Agent: Handles conversation flow and context management + - Translation Agent: Manages multilingual support with focus on Arabic + - Vision Agent: Generates and processes visual responses + - Audio Agent: Handles voice input and speech processing +- **Tool Integration**: Each agent has access to specialized tools: + - Text Generation Tools (Ollama) + - Translation Tools (Ollama) + - Image Generation Tools (DALL-E) + - Audio Processing Tools (Whisper) +- **Context Management**: Agents maintain conversation history and context for coherent interactions + +### Multi-Modal Capabilities +- **Text Processing**: + - Natural language understanding + - Context-aware responses + - Multi-language support +- **Visual Processing**: + - Image generation based on context + - Visual response to queries + - Image-to-text understanding +- **Audio Processing**: + - Voice-to-text conversion + - Multi-format audio support + - Real-time audio processing + +### Core Features +- **Intelligent Chat Interface**: Context-aware conversations with memory +- **Arabic Translation**: Advanced translation capabilities with context preservation +- **Voice Interaction**: Natural voice input and processing +- **Visual Response Generation**: Contextual image generation +- **Multi-Tool Integration**: Seamless coordination between different AI tools + +## Technical Architecture + +### Agent System + +
 ```
+┌───────────────┐ ┌───────────────┐ ┌───────────────┐
+│ Chat Agent │<────>│ Vision Agent │<────>│ Audio Agent │
+└───────┬───────┘ └───────┬───────┘ └───────┬───────┘
+│ │ │
+▼ ▼ ▼
+┌───────────────┐ ┌───────────────┐ ┌───────────────┐
+│ Translation   │ │ Image         │ │ Audio         │
+│ Tools         │ │ Generation    │ │ Processing    │
+│ (Ollama)      │ │ (DALL-E)      │ │ (Whisper)     │
+└───────────────┘ └───────────────┘ └───────────────┘
+``` 
+ +### Multi-Modal Flow +1. **Input Processing**: + - Text input → Chat Agent + - Voice input → Audio Agent → Chat Agent + - Image input → Vision Agent → Chat Agent + +2. **Response Generation**: + - Chat Agent coordinates with other agents + - Translation Agent processes language needs + - Vision Agent generates visual responses + - Audio Agent processes voice output + +## Prerequisites + +Before running the application, ensure you have the following installed: + +- Python 3.8 or higher +- Ollama (for local LLM support) +- FFmpeg (for audio processing) +- Required Python packages (listed in requirements.txt) + +## Installation + +1. Clone the repository: +```bash +git clone https://github.com/yourusername/airline_ai_assistant.git +cd airline_ai_assistant +``` + +2. Install the required packages: +```bash +pip install -r requirements.txt +``` + +3. Set up your environment variables: +```bash +# Create a .env file with your API keys +OPENAI_API_KEY=your_key_here +``` + +## Usage + +1. Start the application: +```bash +python main.py +``` + +2. Access the web interface through your browser (default: http://localhost:7860) + +3. Interact with the assistant: + - Type your message in the text box + - Use the microphone for voice input + - View responses in both English and Arabic + - See visual representations of responses + +## Project Structure + +
 ```
+airline_ai_assistant/
+├── main.py # Main application file with agent orchestration
+├── agents/ # Agent implementations
+│ ├── chat_agent.py # Chat handling agent
+│ ├── vision_agent.py # Visual processing agent
+│ ├── audio_agent.py # Audio processing agent
+│ └── translation_agent.py # Translation handling agent
+├── tools/ # Tool implementations
+│ ├── text_tools.py # Text processing tools
+│ ├── vision_tools.py # Image processing tools
+│ └── audio_tools.py # Audio processing tools
+├── requirements.txt # Python dependencies
+├── .env # Environment variables
+└── README.md # Project documentation
+``` 
+ +## Key Components + +### Agent System +- **Chat Agent**: Manages conversation flow and context +- **Translation Agent**: Handles multilingual support +- **Vision Agent**: Processes visual content +- **Audio Agent**: Manages voice interactions + +### Tool Integration +- **Text Tools**: Language model integration +- **Vision Tools**: Image generation and processing +- **Audio Tools**: Voice processing and transcription + +## Contributing + +Contributions are welcome! Please feel free to submit a Pull Request. + +## License + +This project is licensed under the MIT License - see the LICENSE file for details. + +## Acknowledgments + +- OpenAI for the Whisper and DALL-E models +- Ollama for local LLM support +- Gradio for the web interface +- The open-source community for various tools and libraries + +## Contact + +For questions or support, please open an issue in the repository. \ No newline at end of file diff --git a/week2/community-contributions/muawiya/app.py b/week2/community-contributions/muawiya/app.py new file mode 100644 index 0000000..cebca69 --- /dev/null +++ b/week2/community-contributions/muawiya/app.py @@ -0,0 +1,328 @@ +# imports + +import os +import json +from dotenv import load_dotenv +from openai import OpenAI +import gradio as gr +import base64 +from io import BytesIO +from PIL import Image +from IPython.display import Audio, display +import pygame +import time +from tools import price_function, get_ticket_price, make_a_booking, booking_function +import ollama +import anthropic +from anthropic import Anthropic +import whisper +import numpy as np + +# And this is included in a list of tools: + +tools = [{"type": "function", "function": price_function}, {"type": "function", "function": booking_function}] +# tools = [price_function, booking_function] + +# System messages +system_message = "You are a helpful assistant for an Airline called FlightAI. " +system_message += "Give short, courteous answers, no more than 1 sentence. " +system_message += "Always be accurate. If you don't know the answer, say so." + +# Initialization + +load_dotenv(override=True) + +openai_api_key = os.getenv('OPENAI_API_KEY') +if openai_api_key: + print(f"OpenAI API Key exists and begins {openai_api_key[:8]}") +else: + print("OpenAI API Key not set") + +MODEL = "gpt-4o-mini" +openai = OpenAI() + + +def chat(history): + messages = [{"role": "system", "content": system_message}] + history + response = openai.chat.completions.create(model=MODEL, messages=messages, tools=tools) + image = None + + if response.choices[0].finish_reason == "tool_calls": + message = response.choices[0].message + response, city = handle_tool_call(message) + messages.append(message) + messages.append(response) + if message.tool_calls[0].function.name == "get_ticket_price": + # image = artist(city) + pass + response = openai.chat.completions.create(model=MODEL, messages=messages) + + reply = response.choices[0].message.content + + # ✅ SAFETY CHECK: Never add empty or None replies + if reply: + history.append({"role": "assistant", "content": str(reply)}) + talker(reply) + else: + history.append({"role": "assistant", "content": "Sorry, no response available."}) + + return history, image + + +# We have to write that function handle_tool_call: + +def handle_tool_call(message): + print(f"Handling tool call: {message}") + tool_call = message.tool_calls[0] + function_name = tool_call.function.name + arguments = json.loads(tool_call.function.arguments) + + if function_name == "get_ticket_price": + city = arguments.get('destination_city') + price = get_ticket_price(city) + response = { + "role": "tool", + "content": json.dumps({"destination_city": city, "price": price}), + "tool_call_id": tool_call.id + } + return response, city + + elif function_name == "make_a_booking": + city = arguments.get('destination_city') + customer_name = arguments.get('customer_name') + customer_id = arguments.get('customer_id') + booking_result = make_a_booking(city, customer_name, customer_id) + response = { + "role": "tool", + "content": json.dumps({ + "destination_city": city, + "customer_name": customer_name, + "customer_id": customer_id, + "booking_result": booking_result + }), + "tool_call_id": tool_call.id + } + return response, city + + else: + raise ValueError(f"Unknown function: {function_name}") + + +def artist(city): + image_response = openai.images.generate( + model="dall-e-3", + prompt=f"An image representing a vacation in {city}, showing tourist spots and everything unique about {city}, in a vibrant pop-art style", + size="1024x1024", + n=1, + response_format="b64_json", + ) + image_base64 = image_response.data[0].b64_json + image_data = base64.b64decode(image_base64) + return Image.open(BytesIO(image_data)) + + +def talker(message): + response = openai.audio.speech.create( + model="tts-1", + voice="onyx", + input=message) + + audio_stream = BytesIO(response.content) + output_filename = f"output_audio_{time.time()}.mp3" + with open(output_filename, "wb") as f: + f.write(audio_stream.read()) + + # Play the generated audio + # display(Audio(output_filename, autoplay=True)) # This code is suitable for Juopyter + print(f"Created audio file at {output_filename}") + + # Using pygame + pygame.init() + pygame.mixer.init() + pygame.mixer.music.load(output_filename) + pygame.mixer.music.play() + while pygame.mixer.music.get_busy(): + continue + + +def ollama_translator(text, target_language="German"): + """ + Translates text to the specified language using Ollama. + + Args: + text (str): The text to translate + target_language (str): The language to translate to (default: Arabic) + + Returns: + str: The translated text + """ + try: + # Create a prompt that instructs the model to translate + prompt = f"Translate the following text to {target_language}. Only output the translation, nothing else:\n\n{text}" + + response = ollama.chat( + model='llama3.2:latest', # or any other model you have installed + messages=[ + {"role": "system", "content": "You are a professional translator. Translate the given text accurately."}, + {"role": "user", "content": prompt} + ] + ) + + translated_text = response['message']['content'].strip() + return translated_text + + except Exception as e: + print(f"Translation error: {str(e)}") + return f"Translation failed: {str(e)}" + + +def translate_message(history): + """ + Translates the last message in the chat history. + + Args: + history (list): List of chat messages + + Returns: + str: Translated text of the last message + """ + if not history: + return "" + + # Get the last message from history + last_message = history[-1] + + # Extract the content from the last message + message_content = last_message.get('content', '') + + if message_content: + return ollama_translator(message_content) + return "" + + +def clear_chat(): + return [], "" + + +def convert_audio_to_text(audio_file_path): + """ + Converts audio to text using OpenAI's Whisper model. + Supports MP3, WAV, and other common audio formats. + + Args: + audio_file_path (str): Path to the audio file + + Returns: + str: Transcribed text + """ + try: + # Load the Whisper model + model = whisper.load_model("base") + + # Transcribe the audio file + result = model.transcribe(audio_file_path) + + # Return the transcribed text + return result["text"] + + except Exception as e: + print(f"Audio transcription error: {str(e)}") + return f"Transcription failed: {str(e)}" + + +def handle_audio(audio_file, history): + history = history or [] + if audio_file: + try: + if not os.path.exists(audio_file): + raise Exception("Audio file not found") + + try: + transcribed_text = convert_audio_to_text(audio_file) + except Exception as e: + print(f"Transcription error: {str(e)}") + return history, None # 🛠️ match expected outputs + + if transcribed_text: + history.append({"role": "user", "content": str(transcribed_text)}) + + try: + if os.path.exists(audio_file): + os.remove(audio_file) + except Exception as e: + print(f"Warning: Could not delete audio file: {str(e)}") + + return history, None # ✅ return both expected outputs + except Exception as e: + print(f"Error processing audio: {str(e)}") + return history, None + return history, None + + +if __name__ == "__main__": + # gr.ChatInterface(fn=chat, type="messages").launch() + # talker("Hello, how are you?") + # Passing in inbrowser=True in the last line will cause a Gradio window to pop up immediately. + + # print(ollama_translator("Hello, how are you?")) + # print(convert_audio_to_text("output_audio_1744898241.4550629.mp3")) + + with gr.Blocks() as ui: + with gr.Row(): + with gr.Column(): + chatbot = gr.Chatbot(height=500, type="messages") + with gr.Row(): + entry = gr.Textbox(label="Chat with our AI Assistant:") + audio_input = gr.Audio( + type="filepath", + label="Or speak your message:", + interactive=True, + format="wav", + # source="microphone" + ) + clear = gr.Button("Clear") + with gr.Column(): + translation_output = gr.Textbox(label="Translation (Arabic):", lines=5) + image_output = gr.Image(height=500) + + + def do_entry(message, history): + history = history or [] + if message: + history.append({"role": "user", "content": str(message)}) + return "", history + + + def translate_message(history): + if not history: + return "" + last_message = history[-1] + message_content = last_message.get('content', '') + if message_content: + return ollama_translator(message_content) + return "" + + + def clear_chat(): + return [], "" + + + # Handle text input + entry.submit(do_entry, inputs=[entry, chatbot], outputs=[entry, chatbot]).then( + chat, inputs=chatbot, outputs=[chatbot, image_output] + ).then( + translate_message, inputs=chatbot, outputs=translation_output + ) + + # Handle audio input + audio_input.stop_recording( + handle_audio, inputs=[audio_input, chatbot], outputs=[chatbot, image_output] + ).then( + chat, inputs=chatbot, outputs=[chatbot, image_output] + ).then( + translate_message, inputs=chatbot, outputs=translation_output + ) + + clear.click(clear_chat, inputs=None, outputs=[chatbot, translation_output]) + + ui.launch(inbrowser=False) diff --git a/week2/community-contributions/muawiya/requirements.txt b/week2/community-contributions/muawiya/requirements.txt new file mode 100644 index 0000000..994091e --- /dev/null +++ b/week2/community-contributions/muawiya/requirements.txt @@ -0,0 +1,72 @@ +aiofiles==24.1.0 +annotated-types==0.7.0 +anyio==4.9.0 +asttokens==3.0.0 +certifi==2025.1.31 +charset-normalizer==3.4.1 +click==8.1.8 +colorama==0.4.6 +decorator==5.2.1 +distro==1.9.0 +executing==2.2.0 +fastapi==0.115.12 +ffmpy==0.5.0 +filelock==3.18.0 +fsspec==2025.3.2 +gradio==5.25.2 +gradio_client==1.8.0 +groovy==0.1.2 +h11==0.14.0 +httpcore==1.0.8 +httpx==0.28.1 +huggingface-hub==0.30.2 +idna==3.10 +ipython==8.35.0 +ipython_pygments_lexers==1.1.1 +jedi==0.19.2 +Jinja2==3.1.6 +jiter==0.9.0 +markdown-it-py==3.0.0 +MarkupSafe==3.0.2 +matplotlib-inline==0.1.7 +mdurl==0.1.2 +numpy==2.2.4 +openai==1.74.0 +orjson==3.10.16 +packaging==24.2 +pandas==2.2.3 +parso==0.8.4 +pillow==11.2.1 +prompt_toolkit==3.0.51 +pure_eval==0.2.3 +pydantic==2.11.3 +pydantic_core==2.33.1 +pydub==0.25.1 +pygame==2.6.1 +Pygments==2.19.1 +python-dateutil==2.9.0.post0 +python-dotenv==1.1.0 +python-multipart==0.0.20 +pytz==2025.2 +PyYAML==6.0.2 +requests==2.32.3 +rich==14.0.0 +ruff==0.11.5 +safehttpx==0.1.6 +semantic-version==2.10.0 +shellingham==1.5.4 +six==1.17.0 +sniffio==1.3.1 +stack-data==0.6.3 +starlette==0.46.2 +tomlkit==0.13.2 +tqdm==4.67.1 +traitlets==5.14.3 +typer==0.15.2 +typing-inspection==0.4.0 +typing_extensions==4.13.2 +tzdata==2025.2 +urllib3==2.4.0 +uvicorn==0.34.1 +wcwidth==0.2.13 +websockets==15.0.1 diff --git a/week2/community-contributions/muawiya/tools.py b/week2/community-contributions/muawiya/tools.py new file mode 100644 index 0000000..28752c3 --- /dev/null +++ b/week2/community-contributions/muawiya/tools.py @@ -0,0 +1,57 @@ +# Let's start by making a useful function + +ticket_prices = {"london": "$799", "paris": "$899", "tokyo": "$1400", "berlin": "$499"} + +def get_ticket_price(destination_city): + print(f"Tool get_ticket_price called for {destination_city}") + city = destination_city.lower() + return ticket_prices.get(city, "Unknown") + +def make_a_booking(destination_city, customer_name, customer_id): + print(f"Tool make_a_booking called for {destination_city}") + city = destination_city.lower() + print(f"Customer name: {customer_name}, Customer ID: {customer_id}") + return True + +# There's a particular dictionary structure that's required to describe our function: + +price_function = { + "name": "get_ticket_price", + "description": "Get the price of a return ticket to the destination city. Call this whenever you need to know the ticket price, for example when a customer asks 'How much is a ticket to this city'", + "parameters": { + "type": "object", + "properties": { + "destination_city": { + "type": "string", + "description": "The city that the customer wants to travel to", + }, + }, + "required": ["destination_city"], + "additionalProperties": False + } +} + +booking_function = { + "name": "make_a_booking", + "description": "Make a booking for a customer to a destination city. Call this when a customer wants to book a flight. You can get the customer's name and ID by directly asking the customer. For example, you can say 'What is your name?' or 'What is your ID?'", + "parameters": { + "type": "object", + "properties": { + "destination_city": { + "type": "string", + "description": "The city that the customer wants to travel to", + }, + "customer_name": { + "type": "string", + "description": "The name of the customer making the booking", + }, + "customer_id": { + "type": "string", + "description": "The unique identifier for the customer", + } + }, + "required": ["destination_city", "customer_name", "customer_id"], + "additionalProperties": False + } +} +