diff --git a/week3/community-contributions/salah/meeting_minutes_v2.py b/week3/community-contributions/salah/meeting_minutes_v2.py new file mode 100644 index 0000000..2a1adc2 --- /dev/null +++ b/week3/community-contributions/salah/meeting_minutes_v2.py @@ -0,0 +1,301 @@ +#!/usr/bin/env python3 + +import os +import torch +import requests +import json +import librosa +import numpy as np +from pathlib import Path +from datetime import datetime +from transformers import pipeline +import gradio as gr + +# Basic config +TRANSCRIPTION_MODEL = "openai/whisper-tiny.en" +OLLAMA_MODEL = "llama3.2:latest" +OLLAMA_URL = "http://localhost:11434" +DEVICE = "cuda" if torch.cuda.is_available() else "cpu" +OUTPUT_DIR = Path("./output") + +# ============================ +# MODEL LOADING +# ============================ + +def check_ollama(): + try: + response = requests.get(f"{OLLAMA_URL}/api/tags", timeout=5) + if response.status_code == 200: + models = response.json().get('models', []) + model_names = [model['name'] for model in models] + return OLLAMA_MODEL in model_names + return False + except: + return False + +def call_ollama(prompt): + payload = { + "model": OLLAMA_MODEL, + "prompt": prompt, + "stream": False, + "options": { + "temperature": 0.7, + "num_predict": 1000 + } + } + + try: + response = requests.post(f"{OLLAMA_URL}/api/generate", json=payload, timeout=120) + if response.status_code == 200: + return response.json().get('response', '').strip() + return "Error: Ollama request failed" + except: + return "Error: Could not connect to Ollama" + +def load_models(): + print("Loading models...") + + if not check_ollama(): + print("Ollama not available") + return None, False + + try: + transcription_pipe = pipeline( + "automatic-speech-recognition", + model=TRANSCRIPTION_MODEL, + torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32, + device=0 if DEVICE == "cuda" else -1, + return_timestamps=True + ) + print("Models loaded successfully") + return transcription_pipe, True + except Exception as e: + print(f"Failed to load models: {e}") + return None, False + +# ============================ +# PROCESSING FUNCTIONS +# ============================ + +def transcribe_audio(audio_file_path, transcription_pipe): + if not os.path.exists(audio_file_path): + return "Error: Audio file not found" + + try: + # Load audio with librosa + audio, sr = librosa.load(audio_file_path, sr=16000) + if not isinstance(audio, np.ndarray): + audio = np.array(audio) + + result = transcription_pipe(audio) + + # Extract text from result + if isinstance(result, dict): + if "text" in result: + transcription = result["text"].strip() + elif "chunks" in result: + transcription = " ".join([chunk["text"] for chunk in result["chunks"]]).strip() + else: + transcription = str(result).strip() + else: + transcription = str(result).strip() + + return transcription + + except Exception as e: + return f"Error: {str(e)}" + +def generate_minutes(transcription): + prompt = f"""Create meeting minutes from this transcript: + +{transcription[:2000]} + +Include: +- Summary with attendees and topics +- Key discussion points +- Important decisions +- Action items + +Meeting Minutes:""" + + result = call_ollama(prompt) + return result + +def save_results(transcription, minutes, meeting_type="meeting"): + try: + OUTPUT_DIR.mkdir(exist_ok=True) + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + filename = f"{meeting_type}_minutes_{timestamp}.md" + filepath = OUTPUT_DIR / filename + + content = f"""# Meeting Minutes + +**Generated:** {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} + +## Meeting Minutes + +{minutes} + +## Full Transcription + +{transcription} +""" + + with open(filepath, 'w', encoding='utf-8') as f: + f.write(content) + + return str(filepath) + + except Exception as e: + return f"Error saving: {str(e)}" + +# ============================ +# GRADIO INTERFACE +# ============================ + +def process_audio_file(audio_file, meeting_type, progress=gr.Progress()): + progress(0.0, desc="Starting...") + + if not hasattr(process_audio_file, 'models') or not process_audio_file.models[0]: + return "", "", "Models not loaded" + + transcription_pipe, ollama_ready = process_audio_file.models + + if not ollama_ready: + return "", "", "Ollama not available" + + try: + audio_path = audio_file.name if hasattr(audio_file, 'name') else str(audio_file) + if not audio_path: + return "", "", "No audio file provided" + + progress(0.2, desc="Transcribing...") + transcription = transcribe_audio(audio_path, transcription_pipe) + + if transcription.startswith("Error:"): + return transcription, "", "Transcription failed" + + progress(0.6, desc="Generating minutes...") + minutes = generate_minutes(transcription) + + if minutes.startswith("Error:"): + return transcription, minutes, "Minutes generation failed" + + progress(0.9, desc="Saving...") + save_path = save_results(transcription, minutes, meeting_type) + + progress(1.0, desc="Complete!") + + status = f"""Processing completed! + +Transcription: {len(transcription)} characters +Minutes: {len(minutes)} characters +Saved to: {save_path} + +Models used: +- Transcription: {TRANSCRIPTION_MODEL} +- LLM: {OLLAMA_MODEL} +- Device: {DEVICE} +""" + + return transcription, minutes, status + + except Exception as e: + progress(1.0, desc="Failed") + return "", "", f"Processing failed: {str(e)}" + +def create_interface(): + with gr.Blocks(title="Meeting Minutes Creator") as interface: + + gr.HTML("
HuggingFace Whisper + Ollama
") + + with gr.Row(): + with gr.Column(): + gr.Markdown("### Audio Input") + + audio_input = gr.Audio( + label="Upload or Record Audio", + type="filepath", + sources=["upload", "microphone"] + ) + + meeting_type = gr.Dropdown( + choices=["meeting", "standup", "interview", "call"], + value="meeting", + label="Meeting Type" + ) + + process_btn = gr.Button("Generate Minutes", variant="primary") + + gr.HTML(f""" +