Bootcamp Week 2: Add Meeting Minutes Creator with transcription and generation features

2025-10-27 12:55:06 +03:00
parent 8faff0283b
commit e8cfa78499
3 changed files with 346 additions and 0 deletions
--- a/week3/community-contributions/salah/meeting_minutes_v2.py
+++ b/week3/community-contributions/salah/meeting_minutes_v2.py
@@ -0,0 +1,301 @@
+#!/usr/bin/env python3
+
+import os
+import torch
+import requests
+import json
+import librosa
+import numpy as np
+from pathlib import Path
+from datetime import datetime
+from transformers import pipeline
+import gradio as gr
+
+# Basic config
+TRANSCRIPTION_MODEL = "openai/whisper-tiny.en"
+OLLAMA_MODEL = "llama3.2:latest"
+OLLAMA_URL = "http://localhost:11434"
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+OUTPUT_DIR = Path("./output")
+
+# ============================
+# MODEL LOADING
+# ============================
+
+def check_ollama():
+    try:
+        response = requests.get(f"{OLLAMA_URL}/api/tags", timeout=5)
+        if response.status_code == 200:
+            models = response.json().get('models', [])
+            model_names = [model['name'] for model in models]
+            return OLLAMA_MODEL in model_names
+        return False
+    except:
+        return False
+
+def call_ollama(prompt):
+    payload = {
+        "model": OLLAMA_MODEL,
+        "prompt": prompt,
+        "stream": False,
+        "options": {
+            "temperature": 0.7,
+            "num_predict": 1000
+        }
+    }
+    
+    try:
+        response = requests.post(f"{OLLAMA_URL}/api/generate", json=payload, timeout=120)
+        if response.status_code == 200:
+            return response.json().get('response', '').strip()
+        return "Error: Ollama request failed"
+    except:
+        return "Error: Could not connect to Ollama"
+
+def load_models():
+    print("Loading models...")
+    
+    if not check_ollama():
+        print("Ollama not available")
+        return None, False
+    
+    try:
+        transcription_pipe = pipeline(
+            "automatic-speech-recognition",
+            model=TRANSCRIPTION_MODEL,
+            torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
+            device=0 if DEVICE == "cuda" else -1,
+            return_timestamps=True
+        )
+        print("Models loaded successfully")
+        return transcription_pipe, True
+    except Exception as e:
+        print(f"Failed to load models: {e}")
+        return None, False
+
+# ============================
+# PROCESSING FUNCTIONS
+# ============================
+
+def transcribe_audio(audio_file_path, transcription_pipe):
+    if not os.path.exists(audio_file_path):
+        return "Error: Audio file not found"
+    
+    try:
+        # Load audio with librosa
+        audio, sr = librosa.load(audio_file_path, sr=16000)
+        if not isinstance(audio, np.ndarray):
+            audio = np.array(audio)
+        
+        result = transcription_pipe(audio)
+        
+        # Extract text from result
+        if isinstance(result, dict):
+            if "text" in result:
+                transcription = result["text"].strip()
+            elif "chunks" in result:
+                transcription = " ".join([chunk["text"] for chunk in result["chunks"]]).strip()
+            else:
+                transcription = str(result).strip()
+        else:
+            transcription = str(result).strip()
+        
+        return transcription
+        
+    except Exception as e:
+        return f"Error: {str(e)}"
+
+def generate_minutes(transcription):
+    prompt = f"""Create meeting minutes from this transcript:
+
+{transcription[:2000]}
+
+Include:
+- Summary with attendees and topics
+- Key discussion points
+- Important decisions
+- Action items
+
+Meeting Minutes:"""
+    
+    result = call_ollama(prompt)
+    return result
+
+def save_results(transcription, minutes, meeting_type="meeting"):
+    try:
+        OUTPUT_DIR.mkdir(exist_ok=True)
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        filename = f"{meeting_type}_minutes_{timestamp}.md"
+        filepath = OUTPUT_DIR / filename
+        
+        content = f"""# Meeting Minutes
+
+**Generated:** {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
+
+## Meeting Minutes
+
+{minutes}
+
+## Full Transcription
+
+{transcription}
+"""
+        
+        with open(filepath, 'w', encoding='utf-8') as f:
+            f.write(content)
+        
+        return str(filepath)
+        
+    except Exception as e:
+        return f"Error saving: {str(e)}"
+
+# ============================
+# GRADIO INTERFACE
+# ============================
+
+def process_audio_file(audio_file, meeting_type, progress=gr.Progress()):
+    progress(0.0, desc="Starting...")
+    
+    if not hasattr(process_audio_file, 'models') or not process_audio_file.models[0]:
+        return "", "", "Models not loaded"
+    
+    transcription_pipe, ollama_ready = process_audio_file.models
+    
+    if not ollama_ready:
+        return "", "", "Ollama not available"
+    
+    try:
+        audio_path = audio_file.name if hasattr(audio_file, 'name') else str(audio_file)
+        if not audio_path:
+            return "", "", "No audio file provided"
+        
+        progress(0.2, desc="Transcribing...")
+        transcription = transcribe_audio(audio_path, transcription_pipe)
+        
+        if transcription.startswith("Error:"):
+            return transcription, "", "Transcription failed"
+        
+        progress(0.6, desc="Generating minutes...")
+        minutes = generate_minutes(transcription)
+        
+        if minutes.startswith("Error:"):
+            return transcription, minutes, "Minutes generation failed"
+        
+        progress(0.9, desc="Saving...")
+        save_path = save_results(transcription, minutes, meeting_type)
+        
+        progress(1.0, desc="Complete!")
+        
+        status = f"""Processing completed!
+
+Transcription: {len(transcription)} characters  
+Minutes: {len(minutes)} characters  
+Saved to: {save_path}
+
+Models used:
+- Transcription: {TRANSCRIPTION_MODEL}
+- LLM: {OLLAMA_MODEL}
+- Device: {DEVICE}
+"""
+        
+        return transcription, minutes, status
+        
+    except Exception as e:
+        progress(1.0, desc="Failed")
+        return "", "", f"Processing failed: {str(e)}"
+
+def create_interface():
+    with gr.Blocks(title="Meeting Minutes Creator") as interface:
+        
+        gr.HTML("<h1>Meeting Minutes Creator</h1><p>HuggingFace Whisper + Ollama</p>")
+        
+        with gr.Row():
+            with gr.Column():
+                gr.Markdown("### Audio Input")
+                
+                audio_input = gr.Audio(
+                    label="Upload or Record Audio",
+                    type="filepath",
+                    sources=["upload", "microphone"]
+                )
+                
+                meeting_type = gr.Dropdown(
+                    choices=["meeting", "standup", "interview", "call"],
+                    value="meeting",
+                    label="Meeting Type"
+                )
+                
+                process_btn = gr.Button("Generate Minutes", variant="primary")
+                
+                gr.HTML(f"""
+                <div>
+                    <h4>Configuration</h4>
+                    <ul>
+                        <li>Transcription: {TRANSCRIPTION_MODEL}</li>
+                        <li>LLM: {OLLAMA_MODEL}</li>
+                        <li>Device: {DEVICE}</li>
+                    </ul>
+                </div>
+                """)
+            
+            with gr.Column():
+                gr.Markdown("### Results")
+                
+                status_output = gr.Markdown("Ready to process audio")
+                
+                with gr.Tabs():
+                    with gr.Tab("Meeting Minutes"):
+                        minutes_output = gr.Markdown("Minutes will appear here")
+                    
+                    with gr.Tab("Transcription"):
+                        transcription_output = gr.Textbox(
+                            "Transcription will appear here",
+                            lines=15,
+                            show_copy_button=True
+                        )
+        
+        process_btn.click(
+            fn=process_audio_file,
+            inputs=[audio_input, meeting_type],
+            outputs=[transcription_output, minutes_output, status_output],
+            show_progress=True
+        )
+    
+    return interface
+
+# ============================
+# MAIN APPLICATION
+# ============================
+
+def main():
+    print("Meeting Minutes Creator - HuggingFace + Ollama")
+    print("Loading models...")
+    
+    transcription_pipe, ollama_ready = load_models()
+    
+    if not transcription_pipe or not ollama_ready:
+        print("Failed to load models or connect to Ollama")
+        print("Make sure Ollama is running and has the model available")
+        return
+    
+    process_audio_file.models = (transcription_pipe, ollama_ready)
+    
+    print("Models loaded successfully!")
+    print("Starting web interface...")
+    print("Access at: http://localhost:7860")
+    
+    interface = create_interface()
+    
+    try:
+        interface.launch(
+            server_name="localhost",
+            server_port=7860,
+            debug=False
+        )
+    except KeyboardInterrupt:
+        print("Shutting down...")
+    except Exception as e:
+        print(f"Failed to launch: {e}")
+
+if __name__ == "__main__":
+    main()
--- a/week3/community-contributions/salah/output/meeting_minutes_20251024_062609.md
+++ b/week3/community-contributions/salah/output/meeting_minutes_20251024_062609.md
@@ -0,0 +1,36 @@
+# Meeting Minutes
+
+**Generated:** 2025-10-24 06:26:09
+
+## Meeting Minutes
+
+Here are the meeting minutes based on the transcript:
+
+**Dilistanda Meeting Minutes - October 24**
+
+**Attendees:** 
+
+* Jean (Project Manager)
+* [Unknown speaker] ( attendee, name not provided)
+
+**Summary:**
+This meeting was held to discuss ongoing project updates and tasks for Dilistanda. The attendees reviewed the progress made by Jean on the user authentication module and discussed other ongoing work.
+
+**Key Discussion Points:**
+
+* Jean shared his update on completing the user authentication module and fixing three bugs on the login system.
+* [Unknown speaker] mentioned they finished a database migration script and reviewed SORAP or request, but did not provide further details.
+
+**Important Decisions:**
+None
+
+**Action Items:**
+
+1. **Jean:** Continue working on the dashboard to components without any blockers.
+2. [Unknown speaker]: Focus on API points for mobile app development.
+
+Note: Unfortunately, some information was missing from the transcript (e.g., the identity of the second attendee), which made it challenging to create a comprehensive set of meeting minutes.
+
+## Full Transcription
+
+Good morning everyone, this is our Dilistanda meeting for October 24. I am sorrow as a project manager. Jean, can you give us your update? Yeah, Jean here yesterday I completed the user authentication module and I fixed three bugs on the login system. Today I will be working on the dashboard to components, no blocker. Okay, so I'm going to make your turn. How is this mic? I finished the database migration script and I reviewed SORAP or request. Today I will focus on the API points for mobile app.
--- a/week3/community-contributions/salah/requirements.txt
+++ b/week3/community-contributions/salah/requirements.txt
@@ -0,0 +1,9 @@
+# Meeting Minutes Creator V2 - HuggingFace + Ollama Implementation
+# Requirements for Week 3 Day 5 Exercise
+
+torch>=2.0.0
+transformers>=4.35.0
+gradio>=4.0.0
+librosa>=0.10.0
+soundfile>=0.12.0
+requests>=2.31.0